1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 Label done;
1703 __ movl(dst, -1);
1704 __ jcc(Assembler::parity, done);
1705 __ jcc(Assembler::below, done);
1706 __ setcc(Assembler::notEqual, dst);
1707 __ bind(done);
1708 }
1709
1710 // Math.min() # Math.max()
1711 // --------------------------
1712 // ucomis[s/d] #
1713 // ja -> b # a
1714 // jp -> NaN # NaN
1715 // jb -> a # b
1716 // je #
1717 // |-jz -> a | b # a & b
1718 // | -> a #
1719 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1720 XMMRegister a, XMMRegister b,
1721 XMMRegister xmmt, Register rt,
1722 bool min, bool single) {
1723
1724 Label nan, zero, below, above, done;
1725
1726 if (single)
1727 __ ucomiss(a, b);
1728 else
1729 __ ucomisd(a, b);
1730
1731 if (dst->encoding() != (min ? b : a)->encoding())
1732 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1733 else
1734 __ jccb(Assembler::above, done);
1735
1736 __ jccb(Assembler::parity, nan); // PF=1
1737 __ jccb(Assembler::below, below); // CF=1
1738
1739 // equal
1740 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1741 if (single) {
1742 __ ucomiss(a, xmmt);
1743 __ jccb(Assembler::equal, zero);
1744
1745 __ movflt(dst, a);
1746 __ jmp(done);
1747 }
1748 else {
1749 __ ucomisd(a, xmmt);
1750 __ jccb(Assembler::equal, zero);
1751
1752 __ movdbl(dst, a);
1753 __ jmp(done);
1754 }
1755
1756 __ bind(zero);
1757 if (min)
1758 __ vpor(dst, a, b, Assembler::AVX_128bit);
1759 else
1760 __ vpand(dst, a, b, Assembler::AVX_128bit);
1761
1762 __ jmp(done);
1763
1764 __ bind(above);
1765 if (single)
1766 __ movflt(dst, min ? b : a);
1767 else
1768 __ movdbl(dst, min ? b : a);
1769
1770 __ jmp(done);
1771
1772 __ bind(nan);
1773 if (single) {
1774 __ movl(rt, 0x7fc00000); // Float.NaN
1775 __ movdl(dst, rt);
1776 }
1777 else {
1778 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1779 __ movdq(dst, rt);
1780 }
1781 __ jmp(done);
1782
1783 __ bind(below);
1784 if (single)
1785 __ movflt(dst, min ? a : b);
1786 else
1787 __ movdbl(dst, min ? a : b);
1788
1789 __ bind(done);
1790 }
1791
1792 //=============================================================================
1793 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1794
1795 int ConstantTable::calculate_table_base_offset() const {
1796 return 0; // absolute addressing, no offset
1797 }
1798
1799 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1800 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1801 ShouldNotReachHere();
1802 }
1803
1804 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1805 // Empty encoding
1806 }
1807
1808 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1809 return 0;
1810 }
1811
1812 #ifndef PRODUCT
1813 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1814 st->print("# MachConstantBaseNode (empty encoding)");
1815 }
1816 #endif
1817
1818
1819 //=============================================================================
1820 #ifndef PRODUCT
1821 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1822 Compile* C = ra_->C;
1823
1824 int framesize = C->output()->frame_size_in_bytes();
1825 int bangsize = C->output()->bang_size_in_bytes();
1826 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1827 // Remove wordSize for return addr which is already pushed.
1828 framesize -= wordSize;
1829
1830 if (C->output()->need_stack_bang(bangsize)) {
1831 framesize -= wordSize;
1832 st->print("# stack bang (%d bytes)", bangsize);
1833 st->print("\n\t");
1834 st->print("pushq rbp\t# Save rbp");
1835 if (PreserveFramePointer) {
1836 st->print("\n\t");
1837 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1838 }
1839 if (framesize) {
1840 st->print("\n\t");
1841 st->print("subq rsp, #%d\t# Create frame",framesize);
1842 }
1843 } else {
1844 st->print("subq rsp, #%d\t# Create frame",framesize);
1845 st->print("\n\t");
1846 framesize -= wordSize;
1847 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1848 if (PreserveFramePointer) {
1849 st->print("\n\t");
1850 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1851 if (framesize > 0) {
1852 st->print("\n\t");
1853 st->print("addq rbp, #%d", framesize);
1854 }
1855 }
1856 }
1857
1858 if (VerifyStackAtCalls) {
1859 st->print("\n\t");
1860 framesize -= wordSize;
1861 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1862 #ifdef ASSERT
1863 st->print("\n\t");
1864 st->print("# stack alignment check");
1865 #endif
1866 }
1867 if (C->stub_function() != nullptr) {
1868 st->print("\n\t");
1869 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1870 st->print("\n\t");
1871 st->print("je fast_entry\t");
1872 st->print("\n\t");
1873 st->print("call #nmethod_entry_barrier_stub\t");
1874 st->print("\n\tfast_entry:");
1875 }
1876 st->cr();
1877 }
1878 #endif
1879
1880 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1881 Compile* C = ra_->C;
1882
1883 int framesize = C->output()->frame_size_in_bytes();
1884 int bangsize = C->output()->bang_size_in_bytes();
1885
1886 if (C->clinit_barrier_on_entry()) {
1887 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1888 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1889
1890 Label L_skip_barrier;
1891 Register klass = rscratch1;
1892
1893 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1894 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1895
1896 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1897
1898 __ bind(L_skip_barrier);
1899 }
1900
1901 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1902
1903 C->output()->set_frame_complete(__ offset());
1904
1905 if (C->has_mach_constant_base_node()) {
1906 // NOTE: We set the table base offset here because users might be
1907 // emitted before MachConstantBaseNode.
1908 ConstantTable& constant_table = C->output()->constant_table();
1909 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1910 }
1911 }
1912
1913 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1914 {
1915 return MachNode::size(ra_); // too many variables; just compute it
1916 // the hard way
1917 }
1918
1919 int MachPrologNode::reloc() const
1920 {
1921 return 0; // a large enough number
1922 }
1923
1924 //=============================================================================
1925 #ifndef PRODUCT
1926 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1927 {
1928 Compile* C = ra_->C;
1929 if (generate_vzeroupper(C)) {
1930 st->print("vzeroupper");
1931 st->cr(); st->print("\t");
1932 }
1933
1934 int framesize = C->output()->frame_size_in_bytes();
1935 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1936 // Remove word for return adr already pushed
1937 // and RBP
1938 framesize -= 2*wordSize;
1939
1940 if (framesize) {
1941 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1942 st->print("\t");
1943 }
1944
1945 st->print_cr("popq rbp");
1946 if (do_polling() && C->is_method_compilation()) {
1947 st->print("\t");
1948 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1949 "ja #safepoint_stub\t"
1950 "# Safepoint: poll for GC");
1951 }
1952 }
1953 #endif
1954
1955 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1956 {
1957 Compile* C = ra_->C;
1958
1959 if (generate_vzeroupper(C)) {
1960 // Clear upper bits of YMM registers when current compiled code uses
1961 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1962 __ vzeroupper();
1963 }
1964
1965 int framesize = C->output()->frame_size_in_bytes();
1966 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1967 // Remove word for return adr already pushed
1968 // and RBP
1969 framesize -= 2*wordSize;
1970
1971 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1972
1973 if (framesize) {
1974 __ addq(rsp, framesize);
1975 }
1976
1977 __ popq(rbp);
1978
1979 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1980 __ reserved_stack_check();
1981 }
1982
1983 if (do_polling() && C->is_method_compilation()) {
1984 Label dummy_label;
1985 Label* code_stub = &dummy_label;
1986 if (!C->output()->in_scratch_emit_size()) {
1987 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1988 C->output()->add_stub(stub);
1989 code_stub = &stub->entry();
1990 }
1991 __ relocate(relocInfo::poll_return_type);
1992 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1993 }
1994 }
1995
1996 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1997 {
1998 return MachNode::size(ra_); // too many variables; just compute it
1999 // the hard way
2000 }
2001
2002 int MachEpilogNode::reloc() const
2003 {
2004 return 2; // a large enough number
2005 }
2006
2007 const Pipeline* MachEpilogNode::pipeline() const
2008 {
2009 return MachNode::pipeline_class();
2010 }
2011
2012 //=============================================================================
2013
2014 enum RC {
2015 rc_bad,
2016 rc_int,
2017 rc_kreg,
2018 rc_float,
2019 rc_stack
2020 };
2021
2022 static enum RC rc_class(OptoReg::Name reg)
2023 {
2024 if( !OptoReg::is_valid(reg) ) return rc_bad;
2025
2026 if (OptoReg::is_stack(reg)) return rc_stack;
2027
2028 VMReg r = OptoReg::as_VMReg(reg);
2029
2030 if (r->is_Register()) return rc_int;
2031
2032 if (r->is_KRegister()) return rc_kreg;
2033
2034 assert(r->is_XMMRegister(), "must be");
2035 return rc_float;
2036 }
2037
2038 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2039 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2040 int src_hi, int dst_hi, uint ireg, outputStream* st);
2041
2042 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2043 int stack_offset, int reg, uint ireg, outputStream* st);
2044
2045 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2046 int dst_offset, uint ireg, outputStream* st) {
2047 if (masm) {
2048 switch (ireg) {
2049 case Op_VecS:
2050 __ movq(Address(rsp, -8), rax);
2051 __ movl(rax, Address(rsp, src_offset));
2052 __ movl(Address(rsp, dst_offset), rax);
2053 __ movq(rax, Address(rsp, -8));
2054 break;
2055 case Op_VecD:
2056 __ pushq(Address(rsp, src_offset));
2057 __ popq (Address(rsp, dst_offset));
2058 break;
2059 case Op_VecX:
2060 __ pushq(Address(rsp, src_offset));
2061 __ popq (Address(rsp, dst_offset));
2062 __ pushq(Address(rsp, src_offset+8));
2063 __ popq (Address(rsp, dst_offset+8));
2064 break;
2065 case Op_VecY:
2066 __ vmovdqu(Address(rsp, -32), xmm0);
2067 __ vmovdqu(xmm0, Address(rsp, src_offset));
2068 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2069 __ vmovdqu(xmm0, Address(rsp, -32));
2070 break;
2071 case Op_VecZ:
2072 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2073 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2074 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2075 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2076 break;
2077 default:
2078 ShouldNotReachHere();
2079 }
2080 #ifndef PRODUCT
2081 } else {
2082 switch (ireg) {
2083 case Op_VecS:
2084 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2085 "movl rax, [rsp + #%d]\n\t"
2086 "movl [rsp + #%d], rax\n\t"
2087 "movq rax, [rsp - #8]",
2088 src_offset, dst_offset);
2089 break;
2090 case Op_VecD:
2091 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2092 "popq [rsp + #%d]",
2093 src_offset, dst_offset);
2094 break;
2095 case Op_VecX:
2096 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2097 "popq [rsp + #%d]\n\t"
2098 "pushq [rsp + #%d]\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset, src_offset+8, dst_offset+8);
2101 break;
2102 case Op_VecY:
2103 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2104 "vmovdqu xmm0, [rsp + #%d]\n\t"
2105 "vmovdqu [rsp + #%d], xmm0\n\t"
2106 "vmovdqu xmm0, [rsp - #32]",
2107 src_offset, dst_offset);
2108 break;
2109 case Op_VecZ:
2110 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #64]",
2114 src_offset, dst_offset);
2115 break;
2116 default:
2117 ShouldNotReachHere();
2118 }
2119 #endif
2120 }
2121 }
2122
2123 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2124 PhaseRegAlloc* ra_,
2125 bool do_size,
2126 outputStream* st) const {
2127 assert(masm != nullptr || st != nullptr, "sanity");
2128 // Get registers to move
2129 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2130 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2131 OptoReg::Name dst_second = ra_->get_reg_second(this);
2132 OptoReg::Name dst_first = ra_->get_reg_first(this);
2133
2134 enum RC src_second_rc = rc_class(src_second);
2135 enum RC src_first_rc = rc_class(src_first);
2136 enum RC dst_second_rc = rc_class(dst_second);
2137 enum RC dst_first_rc = rc_class(dst_first);
2138
2139 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2140 "must move at least 1 register" );
2141
2142 if (src_first == dst_first && src_second == dst_second) {
2143 // Self copy, no move
2144 return 0;
2145 }
2146 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2147 uint ireg = ideal_reg();
2148 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2149 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2150 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2151 // mem -> mem
2152 int src_offset = ra_->reg2offset(src_first);
2153 int dst_offset = ra_->reg2offset(dst_first);
2154 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2155 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2156 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2157 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2158 int stack_offset = ra_->reg2offset(dst_first);
2159 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2160 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2161 int stack_offset = ra_->reg2offset(src_first);
2162 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2163 } else {
2164 ShouldNotReachHere();
2165 }
2166 return 0;
2167 }
2168 if (src_first_rc == rc_stack) {
2169 // mem ->
2170 if (dst_first_rc == rc_stack) {
2171 // mem -> mem
2172 assert(src_second != dst_first, "overlap");
2173 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2174 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2175 // 64-bit
2176 int src_offset = ra_->reg2offset(src_first);
2177 int dst_offset = ra_->reg2offset(dst_first);
2178 if (masm) {
2179 __ pushq(Address(rsp, src_offset));
2180 __ popq (Address(rsp, dst_offset));
2181 #ifndef PRODUCT
2182 } else {
2183 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2184 "popq [rsp + #%d]",
2185 src_offset, dst_offset);
2186 #endif
2187 }
2188 } else {
2189 // 32-bit
2190 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2191 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2192 // No pushl/popl, so:
2193 int src_offset = ra_->reg2offset(src_first);
2194 int dst_offset = ra_->reg2offset(dst_first);
2195 if (masm) {
2196 __ movq(Address(rsp, -8), rax);
2197 __ movl(rax, Address(rsp, src_offset));
2198 __ movl(Address(rsp, dst_offset), rax);
2199 __ movq(rax, Address(rsp, -8));
2200 #ifndef PRODUCT
2201 } else {
2202 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2203 "movl rax, [rsp + #%d]\n\t"
2204 "movl [rsp + #%d], rax\n\t"
2205 "movq rax, [rsp - #8]",
2206 src_offset, dst_offset);
2207 #endif
2208 }
2209 }
2210 return 0;
2211 } else if (dst_first_rc == rc_int) {
2212 // mem -> gpr
2213 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2214 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2215 // 64-bit
2216 int offset = ra_->reg2offset(src_first);
2217 if (masm) {
2218 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2219 #ifndef PRODUCT
2220 } else {
2221 st->print("movq %s, [rsp + #%d]\t# spill",
2222 Matcher::regName[dst_first],
2223 offset);
2224 #endif
2225 }
2226 } else {
2227 // 32-bit
2228 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2229 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2230 int offset = ra_->reg2offset(src_first);
2231 if (masm) {
2232 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2233 #ifndef PRODUCT
2234 } else {
2235 st->print("movl %s, [rsp + #%d]\t# spill",
2236 Matcher::regName[dst_first],
2237 offset);
2238 #endif
2239 }
2240 }
2241 return 0;
2242 } else if (dst_first_rc == rc_float) {
2243 // mem-> xmm
2244 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2245 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2246 // 64-bit
2247 int offset = ra_->reg2offset(src_first);
2248 if (masm) {
2249 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2250 #ifndef PRODUCT
2251 } else {
2252 st->print("%s %s, [rsp + #%d]\t# spill",
2253 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2254 Matcher::regName[dst_first],
2255 offset);
2256 #endif
2257 }
2258 } else {
2259 // 32-bit
2260 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2261 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2262 int offset = ra_->reg2offset(src_first);
2263 if (masm) {
2264 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2265 #ifndef PRODUCT
2266 } else {
2267 st->print("movss %s, [rsp + #%d]\t# spill",
2268 Matcher::regName[dst_first],
2269 offset);
2270 #endif
2271 }
2272 }
2273 return 0;
2274 } else if (dst_first_rc == rc_kreg) {
2275 // mem -> kreg
2276 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2277 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2278 // 64-bit
2279 int offset = ra_->reg2offset(src_first);
2280 if (masm) {
2281 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2282 #ifndef PRODUCT
2283 } else {
2284 st->print("kmovq %s, [rsp + #%d]\t# spill",
2285 Matcher::regName[dst_first],
2286 offset);
2287 #endif
2288 }
2289 }
2290 return 0;
2291 }
2292 } else if (src_first_rc == rc_int) {
2293 // gpr ->
2294 if (dst_first_rc == rc_stack) {
2295 // gpr -> mem
2296 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2297 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2298 // 64-bit
2299 int offset = ra_->reg2offset(dst_first);
2300 if (masm) {
2301 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2302 #ifndef PRODUCT
2303 } else {
2304 st->print("movq [rsp + #%d], %s\t# spill",
2305 offset,
2306 Matcher::regName[src_first]);
2307 #endif
2308 }
2309 } else {
2310 // 32-bit
2311 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2312 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2313 int offset = ra_->reg2offset(dst_first);
2314 if (masm) {
2315 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2316 #ifndef PRODUCT
2317 } else {
2318 st->print("movl [rsp + #%d], %s\t# spill",
2319 offset,
2320 Matcher::regName[src_first]);
2321 #endif
2322 }
2323 }
2324 return 0;
2325 } else if (dst_first_rc == rc_int) {
2326 // gpr -> gpr
2327 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2328 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2329 // 64-bit
2330 if (masm) {
2331 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2332 as_Register(Matcher::_regEncode[src_first]));
2333 #ifndef PRODUCT
2334 } else {
2335 st->print("movq %s, %s\t# spill",
2336 Matcher::regName[dst_first],
2337 Matcher::regName[src_first]);
2338 #endif
2339 }
2340 return 0;
2341 } else {
2342 // 32-bit
2343 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2344 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2345 if (masm) {
2346 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2347 as_Register(Matcher::_regEncode[src_first]));
2348 #ifndef PRODUCT
2349 } else {
2350 st->print("movl %s, %s\t# spill",
2351 Matcher::regName[dst_first],
2352 Matcher::regName[src_first]);
2353 #endif
2354 }
2355 return 0;
2356 }
2357 } else if (dst_first_rc == rc_float) {
2358 // gpr -> xmm
2359 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2360 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2361 // 64-bit
2362 if (masm) {
2363 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2364 #ifndef PRODUCT
2365 } else {
2366 st->print("movdq %s, %s\t# spill",
2367 Matcher::regName[dst_first],
2368 Matcher::regName[src_first]);
2369 #endif
2370 }
2371 } else {
2372 // 32-bit
2373 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2374 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2375 if (masm) {
2376 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2377 #ifndef PRODUCT
2378 } else {
2379 st->print("movdl %s, %s\t# spill",
2380 Matcher::regName[dst_first],
2381 Matcher::regName[src_first]);
2382 #endif
2383 }
2384 }
2385 return 0;
2386 } else if (dst_first_rc == rc_kreg) {
2387 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2388 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2389 // 64-bit
2390 if (masm) {
2391 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2392 #ifndef PRODUCT
2393 } else {
2394 st->print("kmovq %s, %s\t# spill",
2395 Matcher::regName[dst_first],
2396 Matcher::regName[src_first]);
2397 #endif
2398 }
2399 }
2400 Unimplemented();
2401 return 0;
2402 }
2403 } else if (src_first_rc == rc_float) {
2404 // xmm ->
2405 if (dst_first_rc == rc_stack) {
2406 // xmm -> mem
2407 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2408 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2409 // 64-bit
2410 int offset = ra_->reg2offset(dst_first);
2411 if (masm) {
2412 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2413 #ifndef PRODUCT
2414 } else {
2415 st->print("movsd [rsp + #%d], %s\t# spill",
2416 offset,
2417 Matcher::regName[src_first]);
2418 #endif
2419 }
2420 } else {
2421 // 32-bit
2422 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2423 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2424 int offset = ra_->reg2offset(dst_first);
2425 if (masm) {
2426 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2427 #ifndef PRODUCT
2428 } else {
2429 st->print("movss [rsp + #%d], %s\t# spill",
2430 offset,
2431 Matcher::regName[src_first]);
2432 #endif
2433 }
2434 }
2435 return 0;
2436 } else if (dst_first_rc == rc_int) {
2437 // xmm -> gpr
2438 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2439 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2440 // 64-bit
2441 if (masm) {
2442 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2443 #ifndef PRODUCT
2444 } else {
2445 st->print("movdq %s, %s\t# spill",
2446 Matcher::regName[dst_first],
2447 Matcher::regName[src_first]);
2448 #endif
2449 }
2450 } else {
2451 // 32-bit
2452 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2453 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2454 if (masm) {
2455 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2456 #ifndef PRODUCT
2457 } else {
2458 st->print("movdl %s, %s\t# spill",
2459 Matcher::regName[dst_first],
2460 Matcher::regName[src_first]);
2461 #endif
2462 }
2463 }
2464 return 0;
2465 } else if (dst_first_rc == rc_float) {
2466 // xmm -> xmm
2467 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2468 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2469 // 64-bit
2470 if (masm) {
2471 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2472 #ifndef PRODUCT
2473 } else {
2474 st->print("%s %s, %s\t# spill",
2475 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2476 Matcher::regName[dst_first],
2477 Matcher::regName[src_first]);
2478 #endif
2479 }
2480 } else {
2481 // 32-bit
2482 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2483 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2484 if (masm) {
2485 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2486 #ifndef PRODUCT
2487 } else {
2488 st->print("%s %s, %s\t# spill",
2489 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2490 Matcher::regName[dst_first],
2491 Matcher::regName[src_first]);
2492 #endif
2493 }
2494 }
2495 return 0;
2496 } else if (dst_first_rc == rc_kreg) {
2497 assert(false, "Illegal spilling");
2498 return 0;
2499 }
2500 } else if (src_first_rc == rc_kreg) {
2501 if (dst_first_rc == rc_stack) {
2502 // mem -> kreg
2503 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2504 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2505 // 64-bit
2506 int offset = ra_->reg2offset(dst_first);
2507 if (masm) {
2508 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2509 #ifndef PRODUCT
2510 } else {
2511 st->print("kmovq [rsp + #%d] , %s\t# spill",
2512 offset,
2513 Matcher::regName[src_first]);
2514 #endif
2515 }
2516 }
2517 return 0;
2518 } else if (dst_first_rc == rc_int) {
2519 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2520 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2521 // 64-bit
2522 if (masm) {
2523 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2524 #ifndef PRODUCT
2525 } else {
2526 st->print("kmovq %s, %s\t# spill",
2527 Matcher::regName[dst_first],
2528 Matcher::regName[src_first]);
2529 #endif
2530 }
2531 }
2532 Unimplemented();
2533 return 0;
2534 } else if (dst_first_rc == rc_kreg) {
2535 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2536 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2537 // 64-bit
2538 if (masm) {
2539 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2540 #ifndef PRODUCT
2541 } else {
2542 st->print("kmovq %s, %s\t# spill",
2543 Matcher::regName[dst_first],
2544 Matcher::regName[src_first]);
2545 #endif
2546 }
2547 }
2548 return 0;
2549 } else if (dst_first_rc == rc_float) {
2550 assert(false, "Illegal spill");
2551 return 0;
2552 }
2553 }
2554
2555 assert(0," foo ");
2556 Unimplemented();
2557 return 0;
2558 }
2559
2560 #ifndef PRODUCT
2561 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2562 implementation(nullptr, ra_, false, st);
2563 }
2564 #endif
2565
2566 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2567 implementation(masm, ra_, false, nullptr);
2568 }
2569
2570 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2571 return MachNode::size(ra_);
2572 }
2573
2574 //=============================================================================
2575 #ifndef PRODUCT
2576 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2577 {
2578 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2579 int reg = ra_->get_reg_first(this);
2580 st->print("leaq %s, [rsp + #%d]\t# box lock",
2581 Matcher::regName[reg], offset);
2582 }
2583 #endif
2584
2585 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2586 {
2587 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2588 int reg = ra_->get_encode(this);
2589
2590 __ lea(as_Register(reg), Address(rsp, offset));
2591 }
2592
2593 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2594 {
2595 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2596 if (ra_->get_encode(this) > 15) {
2597 return (offset < 0x80) ? 6 : 9; // REX2
2598 } else {
2599 return (offset < 0x80) ? 5 : 8; // REX
2600 }
2601 }
2602
2603 //=============================================================================
2604 #ifndef PRODUCT
2605 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2606 {
2607 if (UseCompressedClassPointers) {
2608 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2609 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2610 } else {
2611 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2612 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2613 }
2614 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2615 }
2616 #endif
2617
2618 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2619 {
2620 __ ic_check(InteriorEntryAlignment);
2621 }
2622
2623 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2624 {
2625 return MachNode::size(ra_); // too many variables; just compute it
2626 // the hard way
2627 }
2628
2629
2630 //=============================================================================
2631
2632 bool Matcher::supports_vector_calling_convention(void) {
2633 return EnableVectorSupport;
2634 }
2635
2636 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2637 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2638 }
2639
2640 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2641 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2642 }
2643
2644 #ifdef ASSERT
2645 static bool is_ndd_demotable(const MachNode* mdef) {
2646 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2647 }
2648 #endif
2649
2650 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2651 int oper_index) {
2652 if (mdef == nullptr) {
2653 return false;
2654 }
2655
2656 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2657 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2658 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2659 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2660 return false;
2661 }
2662
2663 // Complex memory operand covers multiple incoming edges needed for
2664 // address computation. Biasing def towards any address component will not
2665 // result in NDD demotion by assembler.
2666 if (mdef->operand_num_edges(oper_index) != 1) {
2667 return false;
2668 }
2669
2670 // Demotion candidate must be register mask compatible with definition.
2671 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2672 if (!oper_mask.overlap(mdef->out_RegMask())) {
2673 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2674 return false;
2675 }
2676
2677 switch (oper_index) {
2678 // First operand of MachNode corresponding to Intel APX NDD selection
2679 // pattern can share its assigned register with definition operand if
2680 // their live ranges do not overlap. In such a scenario we can demote
2681 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2682 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2683 // are decorated with a special flag by instruction selector.
2684 case 1:
2685 return is_ndd_demotable_opr1(mdef);
2686
2687 // Definition operand of commutative operation can be biased towards second
2688 // operand.
2689 case 2:
2690 return is_ndd_demotable_opr2(mdef);
2691
2692 // Current scheme only selects up to two biasing candidates
2693 default:
2694 assert(false, "unhandled operand index: %s", mdef->Name());
2695 break;
2696 }
2697
2698 return false;
2699 }
2700
2701 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2702 assert(EnableVectorSupport, "sanity");
2703 int lo = XMM0_num;
2704 int hi = XMM0b_num;
2705 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2706 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2707 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2708 return OptoRegPair(hi, lo);
2709 }
2710
2711 // Is this branch offset short enough that a short branch can be used?
2712 //
2713 // NOTE: If the platform does not provide any short branch variants, then
2714 // this method should return false for offset 0.
2715 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2716 // The passed offset is relative to address of the branch.
2717 // On 86 a branch displacement is calculated relative to address
2718 // of a next instruction.
2719 offset -= br_size;
2720
2721 // the short version of jmpConUCF2 contains multiple branches,
2722 // making the reach slightly less
2723 if (rule == jmpConUCF2_rule)
2724 return (-126 <= offset && offset <= 125);
2725 return (-128 <= offset && offset <= 127);
2726 }
2727
2728 // Return whether or not this register is ever used as an argument.
2729 // This function is used on startup to build the trampoline stubs in
2730 // generateOptoStub. Registers not mentioned will be killed by the VM
2731 // call in the trampoline, and arguments in those registers not be
2732 // available to the callee.
2733 bool Matcher::can_be_java_arg(int reg)
2734 {
2735 return
2736 reg == RDI_num || reg == RDI_H_num ||
2737 reg == RSI_num || reg == RSI_H_num ||
2738 reg == RDX_num || reg == RDX_H_num ||
2739 reg == RCX_num || reg == RCX_H_num ||
2740 reg == R8_num || reg == R8_H_num ||
2741 reg == R9_num || reg == R9_H_num ||
2742 reg == R12_num || reg == R12_H_num ||
2743 reg == XMM0_num || reg == XMM0b_num ||
2744 reg == XMM1_num || reg == XMM1b_num ||
2745 reg == XMM2_num || reg == XMM2b_num ||
2746 reg == XMM3_num || reg == XMM3b_num ||
2747 reg == XMM4_num || reg == XMM4b_num ||
2748 reg == XMM5_num || reg == XMM5b_num ||
2749 reg == XMM6_num || reg == XMM6b_num ||
2750 reg == XMM7_num || reg == XMM7b_num;
2751 }
2752
2753 bool Matcher::is_spillable_arg(int reg)
2754 {
2755 return can_be_java_arg(reg);
2756 }
2757
2758 uint Matcher::int_pressure_limit()
2759 {
2760 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2761 }
2762
2763 uint Matcher::float_pressure_limit()
2764 {
2765 // After experiment around with different values, the following default threshold
2766 // works best for LCM's register pressure scheduling on x64.
2767 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2768 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2769 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2770 }
2771
2772 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2773 // In 64 bit mode a code which use multiply when
2774 // devisor is constant is faster than hardware
2775 // DIV instruction (it uses MulHiL).
2776 return false;
2777 }
2778
2779 // Register for DIVI projection of divmodI
2780 const RegMask& Matcher::divI_proj_mask() {
2781 return INT_RAX_REG_mask();
2782 }
2783
2784 // Register for MODI projection of divmodI
2785 const RegMask& Matcher::modI_proj_mask() {
2786 return INT_RDX_REG_mask();
2787 }
2788
2789 // Register for DIVL projection of divmodL
2790 const RegMask& Matcher::divL_proj_mask() {
2791 return LONG_RAX_REG_mask();
2792 }
2793
2794 // Register for MODL projection of divmodL
2795 const RegMask& Matcher::modL_proj_mask() {
2796 return LONG_RDX_REG_mask();
2797 }
2798
2799 %}
2800
2801 source_hpp %{
2802 // Header information of the source block.
2803 // Method declarations/definitions which are used outside
2804 // the ad-scope can conveniently be defined here.
2805 //
2806 // To keep related declarations/definitions/uses close together,
2807 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2808
2809 #include "runtime/vm_version.hpp"
2810
2811 class NativeJump;
2812
2813 class CallStubImpl {
2814
2815 //--------------------------------------------------------------
2816 //---< Used for optimization in Compile::shorten_branches >---
2817 //--------------------------------------------------------------
2818
2819 public:
2820 // Size of call trampoline stub.
2821 static uint size_call_trampoline() {
2822 return 0; // no call trampolines on this platform
2823 }
2824
2825 // number of relocations needed by a call trampoline stub
2826 static uint reloc_call_trampoline() {
2827 return 0; // no call trampolines on this platform
2828 }
2829 };
2830
2831 class HandlerImpl {
2832
2833 public:
2834
2835 static int emit_deopt_handler(C2_MacroAssembler* masm);
2836
2837 static uint size_deopt_handler() {
2838 // one call and one jmp.
2839 return 7;
2840 }
2841 };
2842
2843 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2844 switch(bytes) {
2845 case 4: // fall-through
2846 case 8: // fall-through
2847 case 16: return Assembler::AVX_128bit;
2848 case 32: return Assembler::AVX_256bit;
2849 case 64: return Assembler::AVX_512bit;
2850
2851 default: {
2852 ShouldNotReachHere();
2853 return Assembler::AVX_NoVec;
2854 }
2855 }
2856 }
2857
2858 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2859 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2860 }
2861
2862 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2863 uint def_idx = use->operand_index(opnd);
2864 Node* def = use->in(def_idx);
2865 return vector_length_encoding(def);
2866 }
2867
2868 static inline bool is_vector_popcount_predicate(BasicType bt) {
2869 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2870 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2871 }
2872
2873 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2874 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2875 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2876 }
2877
2878 class Node::PD {
2879 public:
2880 enum NodeFlags : uint64_t {
2881 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2882 Flag_sets_carry_flag = Node::_last_flag << 2,
2883 Flag_sets_parity_flag = Node::_last_flag << 3,
2884 Flag_sets_zero_flag = Node::_last_flag << 4,
2885 Flag_sets_overflow_flag = Node::_last_flag << 5,
2886 Flag_sets_sign_flag = Node::_last_flag << 6,
2887 Flag_clears_carry_flag = Node::_last_flag << 7,
2888 Flag_clears_parity_flag = Node::_last_flag << 8,
2889 Flag_clears_zero_flag = Node::_last_flag << 9,
2890 Flag_clears_overflow_flag = Node::_last_flag << 10,
2891 Flag_clears_sign_flag = Node::_last_flag << 11,
2892 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2893 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2894 _last_flag = Flag_ndd_demotable_opr2
2895 };
2896 };
2897
2898 %} // end source_hpp
2899
2900 source %{
2901
2902 #include "opto/addnode.hpp"
2903 #include "c2_intelJccErratum_x86.hpp"
2904
2905 void PhaseOutput::pd_perform_mach_node_analysis() {
2906 if (VM_Version::has_intel_jcc_erratum()) {
2907 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2908 _buf_sizes._code += extra_padding;
2909 }
2910 }
2911
2912 int MachNode::pd_alignment_required() const {
2913 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2914 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2915 return IntelJccErratum::largest_jcc_size() + 1;
2916 } else {
2917 return 1;
2918 }
2919 }
2920
2921 int MachNode::compute_padding(int current_offset) const {
2922 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2923 Compile* C = Compile::current();
2924 PhaseOutput* output = C->output();
2925 Block* block = output->block();
2926 int index = output->index();
2927 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2928 } else {
2929 return 0;
2930 }
2931 }
2932
2933 // Emit deopt handler code.
2934 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2935
2936 // Note that the code buffer's insts_mark is always relative to insts.
2937 // That's why we must use the macroassembler to generate a handler.
2938 address base = __ start_a_stub(size_deopt_handler());
2939 if (base == nullptr) {
2940 ciEnv::current()->record_failure("CodeCache is full");
2941 return 0; // CodeBuffer::expand failed
2942 }
2943 int offset = __ offset();
2944
2945 Label start;
2946 __ bind(start);
2947
2948 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2949
2950 int entry_offset = __ offset();
2951
2952 __ jmp(start);
2953
2954 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2955 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2956 "out of bounds read in post-call NOP check");
2957 __ end_a_stub();
2958 return entry_offset;
2959 }
2960
2961 static Assembler::Width widthForType(BasicType bt) {
2962 if (bt == T_BYTE) {
2963 return Assembler::B;
2964 } else if (bt == T_SHORT) {
2965 return Assembler::W;
2966 } else if (bt == T_INT) {
2967 return Assembler::D;
2968 } else {
2969 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2970 return Assembler::Q;
2971 }
2972 }
2973
2974 //=============================================================================
2975
2976 // Float masks come from different places depending on platform.
2977 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2978 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2979 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2980 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2981 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2982 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2983 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2984 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2985 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2986 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2987 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2988 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2989 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2990 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2991 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2992 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2993 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2994 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2995 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2996
2997 //=============================================================================
2998 bool Matcher::match_rule_supported(int opcode) {
2999 if (!has_match_rule(opcode)) {
3000 return false; // no match rule present
3001 }
3002 switch (opcode) {
3003 case Op_AbsVL:
3004 case Op_StoreVectorScatter:
3005 if (UseAVX < 3) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountI:
3010 case Op_PopCountL:
3011 if (!UsePopCountInstruction) {
3012 return false;
3013 }
3014 break;
3015 case Op_PopCountVI:
3016 if (UseAVX < 2) {
3017 return false;
3018 }
3019 break;
3020 case Op_CompressV:
3021 case Op_ExpandV:
3022 case Op_PopCountVL:
3023 if (UseAVX < 2) {
3024 return false;
3025 }
3026 break;
3027 case Op_MulVI:
3028 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3029 return false;
3030 }
3031 break;
3032 case Op_MulVL:
3033 if (UseSSE < 4) { // only with SSE4_1 or AVX
3034 return false;
3035 }
3036 break;
3037 case Op_MulReductionVL:
3038 if (VM_Version::supports_avx512dq() == false) {
3039 return false;
3040 }
3041 break;
3042 case Op_AbsVB:
3043 case Op_AbsVS:
3044 case Op_AbsVI:
3045 case Op_AddReductionVI:
3046 case Op_AndReductionV:
3047 case Op_OrReductionV:
3048 case Op_XorReductionV:
3049 if (UseSSE < 3) { // requires at least SSSE3
3050 return false;
3051 }
3052 break;
3053 case Op_MaxHF:
3054 case Op_MinHF:
3055 if (!VM_Version::supports_avx512vlbw()) {
3056 return false;
3057 } // fallthrough
3058 case Op_AddHF:
3059 case Op_DivHF:
3060 case Op_FmaHF:
3061 case Op_MulHF:
3062 case Op_ReinterpretS2HF:
3063 case Op_ReinterpretHF2S:
3064 case Op_SubHF:
3065 case Op_SqrtHF:
3066 if (!VM_Version::supports_avx512_fp16()) {
3067 return false;
3068 }
3069 break;
3070 case Op_VectorLoadShuffle:
3071 case Op_VectorRearrange:
3072 case Op_MulReductionVI:
3073 if (UseSSE < 4) { // requires at least SSE4
3074 return false;
3075 }
3076 break;
3077 case Op_IsInfiniteF:
3078 case Op_IsInfiniteD:
3079 if (!VM_Version::supports_avx512dq()) {
3080 return false;
3081 }
3082 break;
3083 case Op_SqrtVD:
3084 case Op_SqrtVF:
3085 case Op_VectorMaskCmp:
3086 case Op_VectorCastB2X:
3087 case Op_VectorCastS2X:
3088 case Op_VectorCastI2X:
3089 case Op_VectorCastL2X:
3090 case Op_VectorCastF2X:
3091 case Op_VectorCastD2X:
3092 case Op_VectorUCastB2X:
3093 case Op_VectorUCastS2X:
3094 case Op_VectorUCastI2X:
3095 case Op_VectorMaskCast:
3096 if (UseAVX < 1) { // enabled for AVX only
3097 return false;
3098 }
3099 break;
3100 case Op_PopulateIndex:
3101 if (UseAVX < 2) {
3102 return false;
3103 }
3104 break;
3105 case Op_RoundVF:
3106 if (UseAVX < 2) { // enabled for AVX2 only
3107 return false;
3108 }
3109 break;
3110 case Op_RoundVD:
3111 if (UseAVX < 3) {
3112 return false; // enabled for AVX3 only
3113 }
3114 break;
3115 case Op_CompareAndSwapL:
3116 case Op_CompareAndSwapP:
3117 break;
3118 case Op_StrIndexOf:
3119 if (!UseSSE42Intrinsics) {
3120 return false;
3121 }
3122 break;
3123 case Op_StrIndexOfChar:
3124 if (!UseSSE42Intrinsics) {
3125 return false;
3126 }
3127 break;
3128 case Op_OnSpinWait:
3129 if (VM_Version::supports_on_spin_wait() == false) {
3130 return false;
3131 }
3132 break;
3133 case Op_MulVB:
3134 case Op_LShiftVB:
3135 case Op_RShiftVB:
3136 case Op_URShiftVB:
3137 case Op_VectorInsert:
3138 case Op_VectorLoadMask:
3139 case Op_VectorStoreMask:
3140 case Op_VectorBlend:
3141 if (UseSSE < 4) {
3142 return false;
3143 }
3144 break;
3145 case Op_MaxD:
3146 case Op_MaxF:
3147 case Op_MinD:
3148 case Op_MinF:
3149 if (UseAVX < 1) { // enabled for AVX only
3150 return false;
3151 }
3152 break;
3153 case Op_CacheWB:
3154 case Op_CacheWBPreSync:
3155 case Op_CacheWBPostSync:
3156 if (!VM_Version::supports_data_cache_line_flush()) {
3157 return false;
3158 }
3159 break;
3160 case Op_ExtractB:
3161 case Op_ExtractL:
3162 case Op_ExtractI:
3163 case Op_RoundDoubleMode:
3164 if (UseSSE < 4) {
3165 return false;
3166 }
3167 break;
3168 case Op_RoundDoubleModeV:
3169 if (VM_Version::supports_avx() == false) {
3170 return false; // 128bit vroundpd is not available
3171 }
3172 break;
3173 case Op_LoadVectorGather:
3174 case Op_LoadVectorGatherMasked:
3175 if (UseAVX < 2) {
3176 return false;
3177 }
3178 break;
3179 case Op_FmaF:
3180 case Op_FmaD:
3181 case Op_FmaVD:
3182 case Op_FmaVF:
3183 if (!UseFMA) {
3184 return false;
3185 }
3186 break;
3187 case Op_MacroLogicV:
3188 if (UseAVX < 3 || !UseVectorMacroLogic) {
3189 return false;
3190 }
3191 break;
3192
3193 case Op_VectorCmpMasked:
3194 case Op_VectorMaskGen:
3195 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3196 return false;
3197 }
3198 break;
3199 case Op_VectorMaskFirstTrue:
3200 case Op_VectorMaskLastTrue:
3201 case Op_VectorMaskTrueCount:
3202 case Op_VectorMaskToLong:
3203 if (UseAVX < 1) {
3204 return false;
3205 }
3206 break;
3207 case Op_RoundF:
3208 case Op_RoundD:
3209 break;
3210 case Op_CopySignD:
3211 case Op_CopySignF:
3212 if (UseAVX < 3) {
3213 return false;
3214 }
3215 if (!VM_Version::supports_avx512vl()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressBits:
3220 case Op_ExpandBits:
3221 if (!VM_Version::supports_bmi2()) {
3222 return false;
3223 }
3224 break;
3225 case Op_CompressM:
3226 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3227 return false;
3228 }
3229 break;
3230 case Op_ConvF2HF:
3231 case Op_ConvHF2F:
3232 if (!VM_Version::supports_float16()) {
3233 return false;
3234 }
3235 break;
3236 case Op_VectorCastF2HF:
3237 case Op_VectorCastHF2F:
3238 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3239 return false;
3240 }
3241 break;
3242 }
3243 return true; // Match rules are supported by default.
3244 }
3245
3246 //------------------------------------------------------------------------
3247
3248 static inline bool is_pop_count_instr_target(BasicType bt) {
3249 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3250 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3251 }
3252
3253 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3254 return match_rule_supported_vector(opcode, vlen, bt);
3255 }
3256
3257 // Identify extra cases that we might want to provide match rules for vector nodes and
3258 // other intrinsics guarded with vector length (vlen) and element type (bt).
3259 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3260 if (!match_rule_supported(opcode)) {
3261 return false;
3262 }
3263 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3264 // * SSE2 supports 128bit vectors for all types;
3265 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3266 // * AVX2 supports 256bit vectors for all types;
3267 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3268 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3269 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3270 // And MaxVectorSize is taken into account as well.
3271 if (!vector_size_supported(bt, vlen)) {
3272 return false;
3273 }
3274 // Special cases which require vector length follow:
3275 // * implementation limitations
3276 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3277 // * 128bit vroundpd instruction is present only in AVX1
3278 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3279 switch (opcode) {
3280 case Op_MaxVHF:
3281 case Op_MinVHF:
3282 if (!VM_Version::supports_avx512bw()) {
3283 return false;
3284 }
3285 case Op_AddVHF:
3286 case Op_DivVHF:
3287 case Op_FmaVHF:
3288 case Op_MulVHF:
3289 case Op_SubVHF:
3290 case Op_SqrtVHF:
3291 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3292 return false;
3293 }
3294 if (!VM_Version::supports_avx512_fp16()) {
3295 return false;
3296 }
3297 break;
3298 case Op_AbsVF:
3299 case Op_NegVF:
3300 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vandps and vxorps are not available
3302 }
3303 break;
3304 case Op_AbsVD:
3305 case Op_NegVD:
3306 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3307 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3308 }
3309 break;
3310 case Op_RotateRightV:
3311 case Op_RotateLeftV:
3312 if (bt != T_INT && bt != T_LONG) {
3313 return false;
3314 } // fallthrough
3315 case Op_MacroLogicV:
3316 if (!VM_Version::supports_evex() ||
3317 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3318 return false;
3319 }
3320 break;
3321 case Op_ClearArray:
3322 case Op_VectorMaskGen:
3323 case Op_VectorCmpMasked:
3324 if (!VM_Version::supports_avx512bw()) {
3325 return false;
3326 }
3327 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3328 return false;
3329 }
3330 break;
3331 case Op_LoadVectorMasked:
3332 case Op_StoreVectorMasked:
3333 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinV:
3338 case Op_UMaxV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 break;
3343 case Op_MaxV:
3344 case Op_MinV:
3345 if (UseSSE < 4 && is_integral_type(bt)) {
3346 return false;
3347 }
3348 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3349 // Float/Double intrinsics are enabled for AVX family currently.
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3354 return false;
3355 }
3356 }
3357 break;
3358 case Op_CallLeafVector:
3359 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3360 return false;
3361 }
3362 break;
3363 case Op_AddReductionVI:
3364 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3365 return false;
3366 }
3367 // fallthrough
3368 case Op_AndReductionV:
3369 case Op_OrReductionV:
3370 case Op_XorReductionV:
3371 if (is_subword_type(bt) && (UseSSE < 4)) {
3372 return false;
3373 }
3374 break;
3375 case Op_MinReductionV:
3376 case Op_MaxReductionV:
3377 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3378 return false;
3379 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3380 return false;
3381 }
3382 // Float/Double intrinsics enabled for AVX family.
3383 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3384 return false;
3385 }
3386 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorBlend:
3391 if (UseAVX == 0 && size_in_bits < 128) {
3392 return false;
3393 }
3394 break;
3395 case Op_VectorTest:
3396 if (UseSSE < 4) {
3397 return false; // Implementation limitation
3398 } else if (size_in_bits < 32) {
3399 return false; // Implementation limitation
3400 }
3401 break;
3402 case Op_VectorLoadShuffle:
3403 case Op_VectorRearrange:
3404 if(vlen == 2) {
3405 return false; // Implementation limitation due to how shuffle is loaded
3406 } else if (size_in_bits == 256 && UseAVX < 2) {
3407 return false; // Implementation limitation
3408 }
3409 break;
3410 case Op_VectorLoadMask:
3411 case Op_VectorMaskCast:
3412 if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 // fallthrough
3416 case Op_VectorStoreMask:
3417 if (vlen == 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_PopulateIndex:
3422 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3423 return false;
3424 }
3425 break;
3426 case Op_VectorCastB2X:
3427 case Op_VectorCastS2X:
3428 case Op_VectorCastI2X:
3429 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3430 return false;
3431 }
3432 break;
3433 case Op_VectorCastL2X:
3434 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3435 return false;
3436 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3437 return false;
3438 }
3439 break;
3440 case Op_VectorCastF2X: {
3441 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3442 // happen after intermediate conversion to integer and special handling
3443 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3444 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3445 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 }
3448 }
3449 // fallthrough
3450 case Op_VectorCastD2X:
3451 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3452 return false;
3453 }
3454 break;
3455 case Op_VectorCastF2HF:
3456 case Op_VectorCastHF2F:
3457 if (!VM_Version::supports_f16c() &&
3458 ((!VM_Version::supports_evex() ||
3459 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3460 return false;
3461 }
3462 break;
3463 case Op_RoundVD:
3464 if (!VM_Version::supports_avx512dq()) {
3465 return false;
3466 }
3467 break;
3468 case Op_MulReductionVI:
3469 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3470 return false;
3471 }
3472 break;
3473 case Op_LoadVectorGatherMasked:
3474 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3475 return false;
3476 }
3477 if (is_subword_type(bt) &&
3478 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3479 (size_in_bits < 64) ||
3480 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3481 return false;
3482 }
3483 break;
3484 case Op_StoreVectorScatterMasked:
3485 case Op_StoreVectorScatter:
3486 if (is_subword_type(bt)) {
3487 return false;
3488 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3489 return false;
3490 }
3491 // fallthrough
3492 case Op_LoadVectorGather:
3493 if (!is_subword_type(bt) && size_in_bits == 64) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) && size_in_bits < 64) {
3497 return false;
3498 }
3499 break;
3500 case Op_SaturatingAddV:
3501 case Op_SaturatingSubV:
3502 if (UseAVX < 1) {
3503 return false; // Implementation limitation
3504 }
3505 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3506 return false;
3507 }
3508 break;
3509 case Op_SelectFromTwoVector:
3510 if (size_in_bits < 128) {
3511 return false;
3512 }
3513 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3514 return false;
3515 }
3516 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3520 return false;
3521 }
3522 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3523 return false;
3524 }
3525 break;
3526 case Op_MaskAll:
3527 if (!VM_Version::supports_evex()) {
3528 return false;
3529 }
3530 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 break;
3537 case Op_VectorMaskCmp:
3538 if (vlen < 2 || size_in_bits < 32) {
3539 return false;
3540 }
3541 break;
3542 case Op_CompressM:
3543 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3544 return false;
3545 }
3546 break;
3547 case Op_CompressV:
3548 case Op_ExpandV:
3549 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3550 return false;
3551 }
3552 if (size_in_bits < 128 ) {
3553 return false;
3554 }
3555 case Op_VectorLongToMask:
3556 if (UseAVX < 1) {
3557 return false;
3558 }
3559 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3560 return false;
3561 }
3562 break;
3563 case Op_SignumVD:
3564 case Op_SignumVF:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 break;
3569 case Op_PopCountVI:
3570 case Op_PopCountVL: {
3571 if (!is_pop_count_instr_target(bt) &&
3572 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3573 return false;
3574 }
3575 }
3576 break;
3577 case Op_ReverseV:
3578 case Op_ReverseBytesV:
3579 if (UseAVX < 2) {
3580 return false;
3581 }
3582 break;
3583 case Op_CountTrailingZerosV:
3584 case Op_CountLeadingZerosV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 }
3590 return true; // Per default match rules are supported.
3591 }
3592
3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3594 // ADLC based match_rule_supported routine checks for the existence of pattern based
3595 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3596 // of their non-masked counterpart with mask edge being the differentiator.
3597 // This routine does a strict check on the existence of masked operation patterns
3598 // by returning a default false value for all the other opcodes apart from the
3599 // ones whose masked instruction patterns are defined in this file.
3600 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3601 return false;
3602 }
3603
3604 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3605 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3606 return false;
3607 }
3608 switch(opcode) {
3609 // Unary masked operations
3610 case Op_AbsVB:
3611 case Op_AbsVS:
3612 if(!VM_Version::supports_avx512bw()) {
3613 return false; // Implementation limitation
3614 }
3615 case Op_AbsVI:
3616 case Op_AbsVL:
3617 return true;
3618
3619 // Ternary masked operations
3620 case Op_FmaVF:
3621 case Op_FmaVD:
3622 return true;
3623
3624 case Op_MacroLogicV:
3625 if(bt != T_INT && bt != T_LONG) {
3626 return false;
3627 }
3628 return true;
3629
3630 // Binary masked operations
3631 case Op_AddVB:
3632 case Op_AddVS:
3633 case Op_SubVB:
3634 case Op_SubVS:
3635 case Op_MulVS:
3636 case Op_LShiftVS:
3637 case Op_RShiftVS:
3638 case Op_URShiftVS:
3639 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3640 if (!VM_Version::supports_avx512bw()) {
3641 return false; // Implementation limitation
3642 }
3643 return true;
3644
3645 case Op_MulVL:
3646 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3647 if (!VM_Version::supports_avx512dq()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_AndV:
3653 case Op_OrV:
3654 case Op_XorV:
3655 case Op_RotateRightV:
3656 case Op_RotateLeftV:
3657 if (bt != T_INT && bt != T_LONG) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_VectorLoadMask:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3665 return false;
3666 }
3667 return true;
3668
3669 case Op_AddVI:
3670 case Op_AddVL:
3671 case Op_AddVF:
3672 case Op_AddVD:
3673 case Op_SubVI:
3674 case Op_SubVL:
3675 case Op_SubVF:
3676 case Op_SubVD:
3677 case Op_MulVI:
3678 case Op_MulVF:
3679 case Op_MulVD:
3680 case Op_DivVF:
3681 case Op_DivVD:
3682 case Op_SqrtVF:
3683 case Op_SqrtVD:
3684 case Op_LShiftVI:
3685 case Op_LShiftVL:
3686 case Op_RShiftVI:
3687 case Op_RShiftVL:
3688 case Op_URShiftVI:
3689 case Op_URShiftVL:
3690 case Op_LoadVectorMasked:
3691 case Op_StoreVectorMasked:
3692 case Op_LoadVectorGatherMasked:
3693 case Op_StoreVectorScatterMasked:
3694 return true;
3695
3696 case Op_UMinV:
3697 case Op_UMaxV:
3698 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3699 return false;
3700 } // fallthrough
3701 case Op_MaxV:
3702 case Op_MinV:
3703 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3704 return false; // Implementation limitation
3705 }
3706 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3707 return false; // Implementation limitation
3708 }
3709 return true;
3710 case Op_SaturatingAddV:
3711 case Op_SaturatingSubV:
3712 if (!is_subword_type(bt)) {
3713 return false;
3714 }
3715 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719
3720 case Op_VectorMaskCmp:
3721 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorRearrange:
3727 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3731 return false; // Implementation limitation
3732 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 // Binary Logical operations
3738 case Op_AndVMask:
3739 case Op_OrVMask:
3740 case Op_XorVMask:
3741 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_PopCountVI:
3747 case Op_PopCountVL:
3748 if (!is_pop_count_instr_target(bt)) {
3749 return false;
3750 }
3751 return true;
3752
3753 case Op_MaskAll:
3754 return true;
3755
3756 case Op_CountLeadingZerosV:
3757 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3758 return true;
3759 }
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3766 return false;
3767 }
3768
3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3771 switch (elem_bt) {
3772 case T_BYTE: return false;
3773 case T_SHORT: return !VM_Version::supports_avx512bw();
3774 case T_INT: return !VM_Version::supports_avx();
3775 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3776 default:
3777 ShouldNotReachHere();
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3783 // Prefer predicate if the mask type is "TypeVectMask".
3784 return vt->isa_vectmask() != nullptr;
3785 }
3786
3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3788 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3789 bool legacy = (generic_opnd->opcode() == LEGVEC);
3790 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3791 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3792 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3793 return new legVecZOper();
3794 }
3795 if (legacy) {
3796 switch (ideal_reg) {
3797 case Op_VecS: return new legVecSOper();
3798 case Op_VecD: return new legVecDOper();
3799 case Op_VecX: return new legVecXOper();
3800 case Op_VecY: return new legVecYOper();
3801 case Op_VecZ: return new legVecZOper();
3802 }
3803 } else {
3804 switch (ideal_reg) {
3805 case Op_VecS: return new vecSOper();
3806 case Op_VecD: return new vecDOper();
3807 case Op_VecX: return new vecXOper();
3808 case Op_VecY: return new vecYOper();
3809 case Op_VecZ: return new vecZOper();
3810 }
3811 }
3812 ShouldNotReachHere();
3813 return nullptr;
3814 }
3815
3816 bool Matcher::is_reg2reg_move(MachNode* m) {
3817 switch (m->rule()) {
3818 case MoveVec2Leg_rule:
3819 case MoveLeg2Vec_rule:
3820 case MoveF2VL_rule:
3821 case MoveF2LEG_rule:
3822 case MoveVL2F_rule:
3823 case MoveLEG2F_rule:
3824 case MoveD2VL_rule:
3825 case MoveD2LEG_rule:
3826 case MoveVL2D_rule:
3827 case MoveLEG2D_rule:
3828 return true;
3829 default:
3830 return false;
3831 }
3832 }
3833
3834 bool Matcher::is_generic_vector(MachOper* opnd) {
3835 switch (opnd->opcode()) {
3836 case VEC:
3837 case LEGVEC:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 //------------------------------------------------------------------------
3845
3846 const RegMask* Matcher::predicate_reg_mask(void) {
3847 return &_VECTMASK_REG_mask;
3848 }
3849
3850 // Max vector size in bytes. 0 if not supported.
3851 int Matcher::vector_width_in_bytes(BasicType bt) {
3852 assert(is_java_primitive(bt), "only primitive type vectors");
3853 // SSE2 supports 128bit vectors for all types.
3854 // AVX2 supports 256bit vectors for all types.
3855 // AVX2/EVEX supports 512bit vectors for all types.
3856 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3857 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3858 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3859 size = (UseAVX > 2) ? 64 : 32;
3860 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3861 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3862 // Use flag to limit vector size.
3863 size = MIN2(size,(int)MaxVectorSize);
3864 // Minimum 2 values in vector (or 4 for bytes).
3865 switch (bt) {
3866 case T_DOUBLE:
3867 case T_LONG:
3868 if (size < 16) return 0;
3869 break;
3870 case T_FLOAT:
3871 case T_INT:
3872 if (size < 8) return 0;
3873 break;
3874 case T_BOOLEAN:
3875 if (size < 4) return 0;
3876 break;
3877 case T_CHAR:
3878 if (size < 4) return 0;
3879 break;
3880 case T_BYTE:
3881 if (size < 4) return 0;
3882 break;
3883 case T_SHORT:
3884 if (size < 4) return 0;
3885 break;
3886 default:
3887 ShouldNotReachHere();
3888 }
3889 return size;
3890 }
3891
3892 // Limits on vector size (number of elements) loaded into vector.
3893 int Matcher::max_vector_size(const BasicType bt) {
3894 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3895 }
3896 int Matcher::min_vector_size(const BasicType bt) {
3897 int max_size = max_vector_size(bt);
3898 // Min size which can be loaded into vector is 4 bytes.
3899 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3900 // Support for calling svml double64 vectors
3901 if (bt == T_DOUBLE) {
3902 size = 1;
3903 }
3904 return MIN2(size,max_size);
3905 }
3906
3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3908 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3909 // by default on Cascade Lake
3910 if (VM_Version::is_default_intel_cascade_lake()) {
3911 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3912 }
3913 return Matcher::max_vector_size(bt);
3914 }
3915
3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3917 return -1;
3918 }
3919
3920 // Vector ideal reg corresponding to specified size in bytes
3921 uint Matcher::vector_ideal_reg(int size) {
3922 assert(MaxVectorSize >= size, "");
3923 switch(size) {
3924 case 4: return Op_VecS;
3925 case 8: return Op_VecD;
3926 case 16: return Op_VecX;
3927 case 32: return Op_VecY;
3928 case 64: return Op_VecZ;
3929 }
3930 ShouldNotReachHere();
3931 return 0;
3932 }
3933
3934 // Check for shift by small constant as well
3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3936 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3937 shift->in(2)->get_int() <= 3 &&
3938 // Are there other uses besides address expressions?
3939 !matcher->is_visited(shift)) {
3940 address_visited.set(shift->_idx); // Flag as address_visited
3941 mstack.push(shift->in(2), Matcher::Visit);
3942 Node *conv = shift->in(1);
3943 // Allow Matcher to match the rule which bypass
3944 // ConvI2L operation for an array index on LP64
3945 // if the index value is positive.
3946 if (conv->Opcode() == Op_ConvI2L &&
3947 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(conv)) {
3950 address_visited.set(conv->_idx); // Flag as address_visited
3951 mstack.push(conv->in(1), Matcher::Pre_Visit);
3952 } else {
3953 mstack.push(conv, Matcher::Pre_Visit);
3954 }
3955 return true;
3956 }
3957 return false;
3958 }
3959
3960 // This function identifies sub-graphs in which a 'load' node is
3961 // input to two different nodes, and such that it can be matched
3962 // with BMI instructions like blsi, blsr, etc.
3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3965 // refers to the same node.
3966 //
3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3968 // This is a temporary solution until we make DAGs expressible in ADL.
3969 template<typename ConType>
3970 class FusedPatternMatcher {
3971 Node* _op1_node;
3972 Node* _mop_node;
3973 int _con_op;
3974
3975 static int match_next(Node* n, int next_op, int next_op_idx) {
3976 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3977 return -1;
3978 }
3979
3980 if (next_op_idx == -1) { // n is commutative, try rotations
3981 if (n->in(1)->Opcode() == next_op) {
3982 return 1;
3983 } else if (n->in(2)->Opcode() == next_op) {
3984 return 2;
3985 }
3986 } else {
3987 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3988 if (n->in(next_op_idx)->Opcode() == next_op) {
3989 return next_op_idx;
3990 }
3991 }
3992 return -1;
3993 }
3994
3995 public:
3996 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3997 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3998
3999 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4000 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4001 typename ConType::NativeType con_value) {
4002 if (_op1_node->Opcode() != op1) {
4003 return false;
4004 }
4005 if (_mop_node->outcnt() > 2) {
4006 return false;
4007 }
4008 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4009 if (op1_op2_idx == -1) {
4010 return false;
4011 }
4012 // Memory operation must be the other edge
4013 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4014
4015 // Check that the mop node is really what we want
4016 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4017 Node* op2_node = _op1_node->in(op1_op2_idx);
4018 if (op2_node->outcnt() > 1) {
4019 return false;
4020 }
4021 assert(op2_node->Opcode() == op2, "Should be");
4022 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4023 if (op2_con_idx == -1) {
4024 return false;
4025 }
4026 // Memory operation must be the other edge
4027 int op2_mop_idx = (op2_con_idx & 1) + 1;
4028 // Check that the memory operation is the same node
4029 if (op2_node->in(op2_mop_idx) == _mop_node) {
4030 // Now check the constant
4031 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4032 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4033 return true;
4034 }
4035 }
4036 }
4037 return false;
4038 }
4039 };
4040
4041 static bool is_bmi_pattern(Node* n, Node* m) {
4042 assert(UseBMI1Instructions, "sanity");
4043 if (n != nullptr && m != nullptr) {
4044 if (m->Opcode() == Op_LoadI) {
4045 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4046 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4047 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4048 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4049 } else if (m->Opcode() == Op_LoadL) {
4050 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4051 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4052 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4053 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4054 }
4055 }
4056 return false;
4057 }
4058
4059 // Should the matcher clone input 'm' of node 'n'?
4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4061 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4062 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4063 mstack.push(m, Visit);
4064 return true;
4065 }
4066 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4067 mstack.push(m, Visit); // m = ShiftCntV
4068 return true;
4069 }
4070 if (is_encode_and_store_pattern(n, m)) {
4071 mstack.push(m, Visit);
4072 return true;
4073 }
4074 return false;
4075 }
4076
4077 // Should the Matcher clone shifts on addressing modes, expecting them
4078 // to be subsumed into complex addressing expressions or compute them
4079 // into registers?
4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4081 Node *off = m->in(AddPNode::Offset);
4082 if (off->is_Con()) {
4083 address_visited.test_set(m->_idx); // Flag as address_visited
4084 Node *adr = m->in(AddPNode::Address);
4085
4086 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4087 // AtomicAdd is not an addressing expression.
4088 // Cheap to find it by looking for screwy base.
4089 if (adr->is_AddP() &&
4090 !adr->in(AddPNode::Base)->is_top() &&
4091 !adr->in(AddPNode::Offset)->is_Con() &&
4092 off->get_long() == (int) (off->get_long()) && // immL32
4093 // Are there other uses besides address expressions?
4094 !is_visited(adr)) {
4095 address_visited.set(adr->_idx); // Flag as address_visited
4096 Node *shift = adr->in(AddPNode::Offset);
4097 if (!clone_shift(shift, this, mstack, address_visited)) {
4098 mstack.push(shift, Pre_Visit);
4099 }
4100 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4101 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4102 } else {
4103 mstack.push(adr, Pre_Visit);
4104 }
4105
4106 // Clone X+offset as it also folds into most addressing expressions
4107 mstack.push(off, Visit);
4108 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4109 return true;
4110 } else if (clone_shift(off, this, mstack, address_visited)) {
4111 address_visited.test_set(m->_idx); // Flag as address_visited
4112 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4114 return true;
4115 }
4116 return false;
4117 }
4118
4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4120 switch (bt) {
4121 case BoolTest::eq:
4122 return Assembler::eq;
4123 case BoolTest::ne:
4124 return Assembler::neq;
4125 case BoolTest::le:
4126 case BoolTest::ule:
4127 return Assembler::le;
4128 case BoolTest::ge:
4129 case BoolTest::uge:
4130 return Assembler::nlt;
4131 case BoolTest::lt:
4132 case BoolTest::ult:
4133 return Assembler::lt;
4134 case BoolTest::gt:
4135 case BoolTest::ugt:
4136 return Assembler::nle;
4137 default : ShouldNotReachHere(); return Assembler::_false;
4138 }
4139 }
4140
4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4142 switch (bt) {
4143 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4144 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4145 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4146 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4147 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4148 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4149 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4150 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4151 }
4152 }
4153
4154 // Helper methods for MachSpillCopyNode::implementation().
4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4156 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4157 assert(ireg == Op_VecS || // 32bit vector
4158 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4159 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4160 "no non-adjacent vector moves" );
4161 if (masm) {
4162 switch (ireg) {
4163 case Op_VecS: // copy whole register
4164 case Op_VecD:
4165 case Op_VecX:
4166 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4167 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4168 } else {
4169 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4170 }
4171 break;
4172 case Op_VecY:
4173 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4174 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4175 } else {
4176 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4177 }
4178 break;
4179 case Op_VecZ:
4180 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4181 break;
4182 default:
4183 ShouldNotReachHere();
4184 }
4185 #ifndef PRODUCT
4186 } else {
4187 switch (ireg) {
4188 case Op_VecS:
4189 case Op_VecD:
4190 case Op_VecX:
4191 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4192 break;
4193 case Op_VecY:
4194 case Op_VecZ:
4195 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4196 break;
4197 default:
4198 ShouldNotReachHere();
4199 }
4200 #endif
4201 }
4202 }
4203
4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4205 int stack_offset, int reg, uint ireg, outputStream* st) {
4206 if (masm) {
4207 if (is_load) {
4208 switch (ireg) {
4209 case Op_VecS:
4210 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4211 break;
4212 case Op_VecD:
4213 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4214 break;
4215 case Op_VecX:
4216 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4217 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4218 } else {
4219 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4220 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4221 }
4222 break;
4223 case Op_VecY:
4224 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4225 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 } else {
4227 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4228 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4229 }
4230 break;
4231 case Op_VecZ:
4232 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4233 break;
4234 default:
4235 ShouldNotReachHere();
4236 }
4237 } else { // store
4238 switch (ireg) {
4239 case Op_VecS:
4240 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4241 break;
4242 case Op_VecD:
4243 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4244 break;
4245 case Op_VecX:
4246 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4247 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4248 }
4249 else {
4250 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4251 }
4252 break;
4253 case Op_VecY:
4254 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4255 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 }
4257 else {
4258 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4259 }
4260 break;
4261 case Op_VecZ:
4262 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4263 break;
4264 default:
4265 ShouldNotReachHere();
4266 }
4267 }
4268 #ifndef PRODUCT
4269 } else {
4270 if (is_load) {
4271 switch (ireg) {
4272 case Op_VecS:
4273 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4274 break;
4275 case Op_VecD:
4276 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4277 break;
4278 case Op_VecX:
4279 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecY:
4282 case Op_VecZ:
4283 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 } else { // store
4289 switch (ireg) {
4290 case Op_VecS:
4291 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4292 break;
4293 case Op_VecD:
4294 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4295 break;
4296 case Op_VecX:
4297 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecY:
4300 case Op_VecZ:
4301 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 default:
4304 ShouldNotReachHere();
4305 }
4306 }
4307 #endif
4308 }
4309 }
4310
4311 template <class T>
4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4313 int size = type2aelembytes(bt) * len;
4314 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4315 for (int i = 0; i < len; i++) {
4316 int offset = i * type2aelembytes(bt);
4317 switch (bt) {
4318 case T_BYTE: val->at(i) = con; break;
4319 case T_SHORT: {
4320 jshort c = con;
4321 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4322 break;
4323 }
4324 case T_INT: {
4325 jint c = con;
4326 memcpy(val->adr_at(offset), &c, sizeof(jint));
4327 break;
4328 }
4329 case T_LONG: {
4330 jlong c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4332 break;
4333 }
4334 case T_FLOAT: {
4335 jfloat c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4337 break;
4338 }
4339 case T_DOUBLE: {
4340 jdouble c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4342 break;
4343 }
4344 default: assert(false, "%s", type2name(bt));
4345 }
4346 }
4347 return val;
4348 }
4349
4350 static inline jlong high_bit_set(BasicType bt) {
4351 switch (bt) {
4352 case T_BYTE: return 0x8080808080808080;
4353 case T_SHORT: return 0x8000800080008000;
4354 case T_INT: return 0x8000000080000000;
4355 case T_LONG: return 0x8000000000000000;
4356 default:
4357 ShouldNotReachHere();
4358 return 0;
4359 }
4360 }
4361
4362 #ifndef PRODUCT
4363 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4364 st->print("nop \t# %d bytes pad for loops and calls", _count);
4365 }
4366 #endif
4367
4368 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4369 __ nop(_count);
4370 }
4371
4372 uint MachNopNode::size(PhaseRegAlloc*) const {
4373 return _count;
4374 }
4375
4376 #ifndef PRODUCT
4377 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4378 st->print("# breakpoint");
4379 }
4380 #endif
4381
4382 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4383 __ int3();
4384 }
4385
4386 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4387 return MachNode::size(ra_);
4388 }
4389
4390 %}
4391
4392 //----------ENCODING BLOCK-----------------------------------------------------
4393 // This block specifies the encoding classes used by the compiler to
4394 // output byte streams. Encoding classes are parameterized macros
4395 // used by Machine Instruction Nodes in order to generate the bit
4396 // encoding of the instruction. Operands specify their base encoding
4397 // interface with the interface keyword. There are currently
4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4399 // COND_INTER. REG_INTER causes an operand to generate a function
4400 // which returns its register number when queried. CONST_INTER causes
4401 // an operand to generate a function which returns the value of the
4402 // constant when queried. MEMORY_INTER causes an operand to generate
4403 // four functions which return the Base Register, the Index Register,
4404 // the Scale Value, and the Offset Value of the operand when queried.
4405 // COND_INTER causes an operand to generate six functions which return
4406 // the encoding code (ie - encoding bits for the instruction)
4407 // associated with each basic boolean condition for a conditional
4408 // instruction.
4409 //
4410 // Instructions specify two basic values for encoding. Again, a
4411 // function is available to check if the constant displacement is an
4412 // oop. They use the ins_encode keyword to specify their encoding
4413 // classes (which must be a sequence of enc_class names, and their
4414 // parameters, specified in the encoding block), and they use the
4415 // opcode keyword to specify, in order, their primary, secondary, and
4416 // tertiary opcode. Only the opcode sections which a particular
4417 // instruction needs for encoding need to be specified.
4418 encode %{
4419 enc_class cdql_enc(no_rax_rdx_RegI div)
4420 %{
4421 // Full implementation of Java idiv and irem; checks for
4422 // special case as described in JVM spec., p.243 & p.271.
4423 //
4424 // normal case special case
4425 //
4426 // input : rax: dividend min_int
4427 // reg: divisor -1
4428 //
4429 // output: rax: quotient (= rax idiv reg) min_int
4430 // rdx: remainder (= rax irem reg) 0
4431 //
4432 // Code sequnce:
4433 //
4434 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4435 // 5: 75 07/08 jne e <normal>
4436 // 7: 33 d2 xor %edx,%edx
4437 // [div >= 8 -> offset + 1]
4438 // [REX_B]
4439 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4440 // c: 74 03/04 je 11 <done>
4441 // 000000000000000e <normal>:
4442 // e: 99 cltd
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // f: f7 f9 idiv $div
4446 // 0000000000000011 <done>:
4447 Label normal;
4448 Label done;
4449
4450 // cmp $0x80000000,%eax
4451 __ cmpl(as_Register(RAX_enc), 0x80000000);
4452
4453 // jne e <normal>
4454 __ jccb(Assembler::notEqual, normal);
4455
4456 // xor %edx,%edx
4457 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4458
4459 // cmp $0xffffffffffffffff,%ecx
4460 __ cmpl($div$$Register, -1);
4461
4462 // je 11 <done>
4463 __ jccb(Assembler::equal, done);
4464
4465 // <normal>
4466 // cltd
4467 __ bind(normal);
4468 __ cdql();
4469
4470 // idivl
4471 // <done>
4472 __ idivl($div$$Register);
4473 __ bind(done);
4474 %}
4475
4476 enc_class cdqq_enc(no_rax_rdx_RegL div)
4477 %{
4478 // Full implementation of Java ldiv and lrem; checks for
4479 // special case as described in JVM spec., p.243 & p.271.
4480 //
4481 // normal case special case
4482 //
4483 // input : rax: dividend min_long
4484 // reg: divisor -1
4485 //
4486 // output: rax: quotient (= rax idiv reg) min_long
4487 // rdx: remainder (= rax irem reg) 0
4488 //
4489 // Code sequnce:
4490 //
4491 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4492 // 7: 00 00 80
4493 // a: 48 39 d0 cmp %rdx,%rax
4494 // d: 75 08 jne 17 <normal>
4495 // f: 33 d2 xor %edx,%edx
4496 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4497 // 15: 74 05 je 1c <done>
4498 // 0000000000000017 <normal>:
4499 // 17: 48 99 cqto
4500 // 19: 48 f7 f9 idiv $div
4501 // 000000000000001c <done>:
4502 Label normal;
4503 Label done;
4504
4505 // mov $0x8000000000000000,%rdx
4506 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4507
4508 // cmp %rdx,%rax
4509 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4510
4511 // jne 17 <normal>
4512 __ jccb(Assembler::notEqual, normal);
4513
4514 // xor %edx,%edx
4515 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4516
4517 // cmp $0xffffffffffffffff,$div
4518 __ cmpq($div$$Register, -1);
4519
4520 // je 1e <done>
4521 __ jccb(Assembler::equal, done);
4522
4523 // <normal>
4524 // cqto
4525 __ bind(normal);
4526 __ cdqq();
4527
4528 // idivq (note: must be emitted by the user of this rule)
4529 // <done>
4530 __ idivq($div$$Register);
4531 __ bind(done);
4532 %}
4533
4534 enc_class clear_avx %{
4535 DEBUG_ONLY(int off0 = __ offset());
4536 if (generate_vzeroupper(Compile::current())) {
4537 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4538 // Clear upper bits of YMM registers when current compiled code uses
4539 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4540 __ vzeroupper();
4541 }
4542 DEBUG_ONLY(int off1 = __ offset());
4543 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4544 %}
4545
4546 enc_class Java_To_Runtime(method meth) %{
4547 __ lea(r10, RuntimeAddress((address)$meth$$method));
4548 __ call(r10);
4549 __ post_call_nop();
4550 %}
4551
4552 enc_class Java_Static_Call(method meth)
4553 %{
4554 // JAVA STATIC CALL
4555 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4556 // determine who we intended to call.
4557 if (!_method) {
4558 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4559 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4560 // The NOP here is purely to ensure that eliding a call to
4561 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4562 __ addr_nop_5();
4563 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4564 } else {
4565 int method_index = resolved_method_index(masm);
4566 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4567 : static_call_Relocation::spec(method_index);
4568 address mark = __ pc();
4569 int call_offset = __ offset();
4570 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4571 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4572 // Calls of the same statically bound method can share
4573 // a stub to the interpreter.
4574 __ code()->shared_stub_to_interp_for(_method, call_offset);
4575 } else {
4576 // Emit stubs for static call.
4577 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4578 __ clear_inst_mark();
4579 if (stub == nullptr) {
4580 ciEnv::current()->record_failure("CodeCache is full");
4581 return;
4582 }
4583 }
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
4624 // | SP-+--------+----> Matcher::_old_SP, even aligned
4625 // | | in | 3 area for Intel ret address
4626 // Owned by |preserve| Empty on Sparc.
4627 // SELF +--------+
4628 // | | pad2 | 2 pad to align old SP
4629 // | +--------+ 1
4630 // | | locks | 0
4631 // | +--------+----> OptoReg::stack0(), even aligned
4632 // | | pad1 | 11 pad to align new SP
4633 // | +--------+
4634 // | | | 10
4635 // | | spills | 9 spills
4636 // V | | 8 (pad0 slot for callee)
4637 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4638 // ^ | out | 7
4639 // | | args | 6 Holes in outgoing args owned by CALLEE
4640 // Owned by +--------+
4641 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4642 // | new |preserve| Must be even-aligned.
4643 // | SP-+--------+----> Matcher::_new_SP, even aligned
4644 // | | |
4645 //
4646 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4647 // known from SELF's arguments and the Java calling convention.
4648 // Region 6-7 is determined per call site.
4649 // Note 2: If the calling convention leaves holes in the incoming argument
4650 // area, those holes are owned by SELF. Holes in the outgoing area
4651 // are owned by the CALLEE. Holes should not be necessary in the
4652 // incoming area, as the Java calling convention is completely under
4653 // the control of the AD file. Doubles can be sorted and packed to
4654 // avoid holes. Holes in the outgoing arguments may be necessary for
4655 // varargs C calling conventions.
4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4657 // even aligned with pad0 as needed.
4658 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4659 // region 6-11 is even aligned; it may be padded out more so that
4660 // the region from SP to FP meets the minimum stack alignment.
4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4662 // alignment. Region 11, pad1, may be dynamically extended so that
4663 // SP meets the minimum alignment.
4664
4665 frame
4666 %{
4667 // These three registers define part of the calling convention
4668 // between compiled code and the interpreter.
4669 inline_cache_reg(RAX); // Inline Cache Register
4670
4671 // Optional: name the operand used by cisc-spilling to access
4672 // [stack_pointer + offset]
4673 cisc_spilling_operand_name(indOffset32);
4674
4675 // Number of stack slots consumed by locking an object
4676 sync_stack_slots(2);
4677
4678 // Compiled code's Frame Pointer
4679 frame_pointer(RSP);
4680
4681 // Interpreter stores its frame pointer in a register which is
4682 // stored to the stack by I2CAdaptors.
4683 // I2CAdaptors convert from interpreted java to compiled java.
4684 interpreter_frame_pointer(RBP);
4685
4686 // Stack alignment requirement
4687 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4688
4689 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4690 // for calls to C. Supports the var-args backing area for register parms.
4691 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4692
4693 // The after-PROLOG location of the return address. Location of
4694 // return address specifies a type (REG or STACK) and a number
4695 // representing the register number (i.e. - use a register name) or
4696 // stack slot.
4697 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4698 // Otherwise, it is above the locks and verification slot and alignment word
4699 return_addr(STACK - 2 +
4700 align_up((Compile::current()->in_preserve_stack_slots() +
4701 Compile::current()->fixed_slots()),
4702 stack_alignment_in_slots()));
4703
4704 // Location of compiled Java return values. Same as C for now.
4705 return_value
4706 %{
4707 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4708 "only return normal values");
4709
4710 static const int lo[Op_RegL + 1] = {
4711 0,
4712 0,
4713 RAX_num, // Op_RegN
4714 RAX_num, // Op_RegI
4715 RAX_num, // Op_RegP
4716 XMM0_num, // Op_RegF
4717 XMM0_num, // Op_RegD
4718 RAX_num // Op_RegL
4719 };
4720 static const int hi[Op_RegL + 1] = {
4721 0,
4722 0,
4723 OptoReg::Bad, // Op_RegN
4724 OptoReg::Bad, // Op_RegI
4725 RAX_H_num, // Op_RegP
4726 OptoReg::Bad, // Op_RegF
4727 XMM0b_num, // Op_RegD
4728 RAX_H_num // Op_RegL
4729 };
4730 // Excluded flags and vector registers.
4731 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4732 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4733 %}
4734 %}
4735
4736 //----------ATTRIBUTES---------------------------------------------------------
4737 //----------Operand Attributes-------------------------------------------------
4738 op_attrib op_cost(0); // Required cost attribute
4739
4740 //----------Instruction Attributes---------------------------------------------
4741 ins_attrib ins_cost(100); // Required cost attribute
4742 ins_attrib ins_size(8); // Required size attribute (in bits)
4743 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4744 // a non-matching short branch variant
4745 // of some long branch?
4746 ins_attrib ins_alignment(1); // Required alignment attribute (must
4747 // be a power of 2) specifies the
4748 // alignment that some part of the
4749 // instruction (not necessarily the
4750 // start) requires. If > 1, a
4751 // compute_padding() function must be
4752 // provided for the instruction
4753
4754 // Whether this node is expanded during code emission into a sequence of
4755 // instructions and the first instruction can perform an implicit null check.
4756 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4757
4758 //----------OPERANDS-----------------------------------------------------------
4759 // Operand definitions must precede instruction definitions for correct parsing
4760 // in the ADLC because operands constitute user defined types which are used in
4761 // instruction definitions.
4762
4763 //----------Simple Operands----------------------------------------------------
4764 // Immediate Operands
4765 // Integer Immediate
4766 operand immI()
4767 %{
4768 match(ConI);
4769
4770 op_cost(10);
4771 format %{ %}
4772 interface(CONST_INTER);
4773 %}
4774
4775 // Constant for test vs zero
4776 operand immI_0()
4777 %{
4778 predicate(n->get_int() == 0);
4779 match(ConI);
4780
4781 op_cost(0);
4782 format %{ %}
4783 interface(CONST_INTER);
4784 %}
4785
4786 // Constant for increment
4787 operand immI_1()
4788 %{
4789 predicate(n->get_int() == 1);
4790 match(ConI);
4791
4792 op_cost(0);
4793 format %{ %}
4794 interface(CONST_INTER);
4795 %}
4796
4797 // Constant for decrement
4798 operand immI_M1()
4799 %{
4800 predicate(n->get_int() == -1);
4801 match(ConI);
4802
4803 op_cost(0);
4804 format %{ %}
4805 interface(CONST_INTER);
4806 %}
4807
4808 operand immI_2()
4809 %{
4810 predicate(n->get_int() == 2);
4811 match(ConI);
4812
4813 op_cost(0);
4814 format %{ %}
4815 interface(CONST_INTER);
4816 %}
4817
4818 operand immI_4()
4819 %{
4820 predicate(n->get_int() == 4);
4821 match(ConI);
4822
4823 op_cost(0);
4824 format %{ %}
4825 interface(CONST_INTER);
4826 %}
4827
4828 operand immI_8()
4829 %{
4830 predicate(n->get_int() == 8);
4831 match(ConI);
4832
4833 op_cost(0);
4834 format %{ %}
4835 interface(CONST_INTER);
4836 %}
4837
4838 // Valid scale values for addressing modes
4839 operand immI2()
4840 %{
4841 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4842 match(ConI);
4843
4844 format %{ %}
4845 interface(CONST_INTER);
4846 %}
4847
4848 operand immU7()
4849 %{
4850 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4851 match(ConI);
4852
4853 op_cost(5);
4854 format %{ %}
4855 interface(CONST_INTER);
4856 %}
4857
4858 operand immI8()
4859 %{
4860 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4861 match(ConI);
4862
4863 op_cost(5);
4864 format %{ %}
4865 interface(CONST_INTER);
4866 %}
4867
4868 operand immU8()
4869 %{
4870 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4871 match(ConI);
4872
4873 op_cost(5);
4874 format %{ %}
4875 interface(CONST_INTER);
4876 %}
4877
4878 operand immI16()
4879 %{
4880 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4881 match(ConI);
4882
4883 op_cost(10);
4884 format %{ %}
4885 interface(CONST_INTER);
4886 %}
4887
4888 // Int Immediate non-negative
4889 operand immU31()
4890 %{
4891 predicate(n->get_int() >= 0);
4892 match(ConI);
4893
4894 op_cost(0);
4895 format %{ %}
4896 interface(CONST_INTER);
4897 %}
4898
4899 // Pointer Immediate
4900 operand immP()
4901 %{
4902 match(ConP);
4903
4904 op_cost(10);
4905 format %{ %}
4906 interface(CONST_INTER);
4907 %}
4908
4909 // Null Pointer Immediate
4910 operand immP0()
4911 %{
4912 predicate(n->get_ptr() == 0);
4913 match(ConP);
4914
4915 op_cost(5);
4916 format %{ %}
4917 interface(CONST_INTER);
4918 %}
4919
4920 // Pointer Immediate
4921 operand immN() %{
4922 match(ConN);
4923
4924 op_cost(10);
4925 format %{ %}
4926 interface(CONST_INTER);
4927 %}
4928
4929 operand immNKlass() %{
4930 match(ConNKlass);
4931
4932 op_cost(10);
4933 format %{ %}
4934 interface(CONST_INTER);
4935 %}
4936
4937 // Null Pointer Immediate
4938 operand immN0() %{
4939 predicate(n->get_narrowcon() == 0);
4940 match(ConN);
4941
4942 op_cost(5);
4943 format %{ %}
4944 interface(CONST_INTER);
4945 %}
4946
4947 operand immP31()
4948 %{
4949 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4950 && (n->get_ptr() >> 31) == 0);
4951 match(ConP);
4952
4953 op_cost(5);
4954 format %{ %}
4955 interface(CONST_INTER);
4956 %}
4957
4958
4959 // Long Immediate
4960 operand immL()
4961 %{
4962 match(ConL);
4963
4964 op_cost(20);
4965 format %{ %}
4966 interface(CONST_INTER);
4967 %}
4968
4969 // Long Immediate 8-bit
4970 operand immL8()
4971 %{
4972 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4973 match(ConL);
4974
4975 op_cost(5);
4976 format %{ %}
4977 interface(CONST_INTER);
4978 %}
4979
4980 // Long Immediate 32-bit unsigned
4981 operand immUL32()
4982 %{
4983 predicate(n->get_long() == (unsigned int) (n->get_long()));
4984 match(ConL);
4985
4986 op_cost(10);
4987 format %{ %}
4988 interface(CONST_INTER);
4989 %}
4990
4991 // Long Immediate 32-bit signed
4992 operand immL32()
4993 %{
4994 predicate(n->get_long() == (int) (n->get_long()));
4995 match(ConL);
4996
4997 op_cost(15);
4998 format %{ %}
4999 interface(CONST_INTER);
5000 %}
5001
5002 operand immL_Pow2()
5003 %{
5004 predicate(is_power_of_2((julong)n->get_long()));
5005 match(ConL);
5006
5007 op_cost(15);
5008 format %{ %}
5009 interface(CONST_INTER);
5010 %}
5011
5012 operand immL_NotPow2()
5013 %{
5014 predicate(is_power_of_2((julong)~n->get_long()));
5015 match(ConL);
5016
5017 op_cost(15);
5018 format %{ %}
5019 interface(CONST_INTER);
5020 %}
5021
5022 // Long Immediate zero
5023 operand immL0()
5024 %{
5025 predicate(n->get_long() == 0L);
5026 match(ConL);
5027
5028 op_cost(10);
5029 format %{ %}
5030 interface(CONST_INTER);
5031 %}
5032
5033 // Constant for increment
5034 operand immL1()
5035 %{
5036 predicate(n->get_long() == 1);
5037 match(ConL);
5038
5039 format %{ %}
5040 interface(CONST_INTER);
5041 %}
5042
5043 // Constant for decrement
5044 operand immL_M1()
5045 %{
5046 predicate(n->get_long() == -1);
5047 match(ConL);
5048
5049 format %{ %}
5050 interface(CONST_INTER);
5051 %}
5052
5053 // Long Immediate: low 32-bit mask
5054 operand immL_32bits()
5055 %{
5056 predicate(n->get_long() == 0xFFFFFFFFL);
5057 match(ConL);
5058 op_cost(20);
5059
5060 format %{ %}
5061 interface(CONST_INTER);
5062 %}
5063
5064 // Int Immediate: 2^n-1, positive
5065 operand immI_Pow2M1()
5066 %{
5067 predicate((n->get_int() > 0)
5068 && is_power_of_2((juint)n->get_int() + 1));
5069 match(ConI);
5070
5071 op_cost(20);
5072 format %{ %}
5073 interface(CONST_INTER);
5074 %}
5075
5076 // Float Immediate zero
5077 operand immF0()
5078 %{
5079 predicate(jint_cast(n->getf()) == 0);
5080 match(ConF);
5081
5082 op_cost(5);
5083 format %{ %}
5084 interface(CONST_INTER);
5085 %}
5086
5087 // Float Immediate
5088 operand immF()
5089 %{
5090 match(ConF);
5091
5092 op_cost(15);
5093 format %{ %}
5094 interface(CONST_INTER);
5095 %}
5096
5097 // Half Float Immediate
5098 operand immH()
5099 %{
5100 match(ConH);
5101
5102 op_cost(15);
5103 format %{ %}
5104 interface(CONST_INTER);
5105 %}
5106
5107 // Double Immediate zero
5108 operand immD0()
5109 %{
5110 predicate(jlong_cast(n->getd()) == 0);
5111 match(ConD);
5112
5113 op_cost(5);
5114 format %{ %}
5115 interface(CONST_INTER);
5116 %}
5117
5118 // Double Immediate
5119 operand immD()
5120 %{
5121 match(ConD);
5122
5123 op_cost(15);
5124 format %{ %}
5125 interface(CONST_INTER);
5126 %}
5127
5128 // Immediates for special shifts (sign extend)
5129
5130 // Constants for increment
5131 operand immI_16()
5132 %{
5133 predicate(n->get_int() == 16);
5134 match(ConI);
5135
5136 format %{ %}
5137 interface(CONST_INTER);
5138 %}
5139
5140 operand immI_24()
5141 %{
5142 predicate(n->get_int() == 24);
5143 match(ConI);
5144
5145 format %{ %}
5146 interface(CONST_INTER);
5147 %}
5148
5149 // Constant for byte-wide masking
5150 operand immI_255()
5151 %{
5152 predicate(n->get_int() == 255);
5153 match(ConI);
5154
5155 format %{ %}
5156 interface(CONST_INTER);
5157 %}
5158
5159 // Constant for short-wide masking
5160 operand immI_65535()
5161 %{
5162 predicate(n->get_int() == 65535);
5163 match(ConI);
5164
5165 format %{ %}
5166 interface(CONST_INTER);
5167 %}
5168
5169 // Constant for byte-wide masking
5170 operand immL_255()
5171 %{
5172 predicate(n->get_long() == 255);
5173 match(ConL);
5174
5175 format %{ %}
5176 interface(CONST_INTER);
5177 %}
5178
5179 // Constant for short-wide masking
5180 operand immL_65535()
5181 %{
5182 predicate(n->get_long() == 65535);
5183 match(ConL);
5184
5185 format %{ %}
5186 interface(CONST_INTER);
5187 %}
5188
5189 operand kReg()
5190 %{
5191 constraint(ALLOC_IN_RC(vectmask_reg));
5192 match(RegVectMask);
5193 format %{%}
5194 interface(REG_INTER);
5195 %}
5196
5197 // Register Operands
5198 // Integer Register
5199 operand rRegI()
5200 %{
5201 constraint(ALLOC_IN_RC(int_reg));
5202 match(RegI);
5203
5204 match(rax_RegI);
5205 match(rbx_RegI);
5206 match(rcx_RegI);
5207 match(rdx_RegI);
5208 match(rdi_RegI);
5209
5210 format %{ %}
5211 interface(REG_INTER);
5212 %}
5213
5214 // Special Registers
5215 operand rax_RegI()
5216 %{
5217 constraint(ALLOC_IN_RC(int_rax_reg));
5218 match(RegI);
5219 match(rRegI);
5220
5221 format %{ "RAX" %}
5222 interface(REG_INTER);
5223 %}
5224
5225 // Special Registers
5226 operand rbx_RegI()
5227 %{
5228 constraint(ALLOC_IN_RC(int_rbx_reg));
5229 match(RegI);
5230 match(rRegI);
5231
5232 format %{ "RBX" %}
5233 interface(REG_INTER);
5234 %}
5235
5236 operand rcx_RegI()
5237 %{
5238 constraint(ALLOC_IN_RC(int_rcx_reg));
5239 match(RegI);
5240 match(rRegI);
5241
5242 format %{ "RCX" %}
5243 interface(REG_INTER);
5244 %}
5245
5246 operand rdx_RegI()
5247 %{
5248 constraint(ALLOC_IN_RC(int_rdx_reg));
5249 match(RegI);
5250 match(rRegI);
5251
5252 format %{ "RDX" %}
5253 interface(REG_INTER);
5254 %}
5255
5256 operand rdi_RegI()
5257 %{
5258 constraint(ALLOC_IN_RC(int_rdi_reg));
5259 match(RegI);
5260 match(rRegI);
5261
5262 format %{ "RDI" %}
5263 interface(REG_INTER);
5264 %}
5265
5266 operand no_rax_rdx_RegI()
5267 %{
5268 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5269 match(RegI);
5270 match(rbx_RegI);
5271 match(rcx_RegI);
5272 match(rdi_RegI);
5273
5274 format %{ %}
5275 interface(REG_INTER);
5276 %}
5277
5278 operand no_rbp_r13_RegI()
5279 %{
5280 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5281 match(RegI);
5282 match(rRegI);
5283 match(rax_RegI);
5284 match(rbx_RegI);
5285 match(rcx_RegI);
5286 match(rdx_RegI);
5287 match(rdi_RegI);
5288
5289 format %{ %}
5290 interface(REG_INTER);
5291 %}
5292
5293 // Pointer Register
5294 operand any_RegP()
5295 %{
5296 constraint(ALLOC_IN_RC(any_reg));
5297 match(RegP);
5298 match(rax_RegP);
5299 match(rbx_RegP);
5300 match(rdi_RegP);
5301 match(rsi_RegP);
5302 match(rbp_RegP);
5303 match(r15_RegP);
5304 match(rRegP);
5305
5306 format %{ %}
5307 interface(REG_INTER);
5308 %}
5309
5310 operand rRegP()
5311 %{
5312 constraint(ALLOC_IN_RC(ptr_reg));
5313 match(RegP);
5314 match(rax_RegP);
5315 match(rbx_RegP);
5316 match(rdi_RegP);
5317 match(rsi_RegP);
5318 match(rbp_RegP); // See Q&A below about
5319 match(r15_RegP); // r15_RegP and rbp_RegP.
5320
5321 format %{ %}
5322 interface(REG_INTER);
5323 %}
5324
5325 operand rRegN() %{
5326 constraint(ALLOC_IN_RC(int_reg));
5327 match(RegN);
5328
5329 format %{ %}
5330 interface(REG_INTER);
5331 %}
5332
5333 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5334 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5335 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5336 // The output of an instruction is controlled by the allocator, which respects
5337 // register class masks, not match rules. Unless an instruction mentions
5338 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5339 // by the allocator as an input.
5340 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5341 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5342 // result, RBP is not included in the output of the instruction either.
5343
5344 // This operand is not allowed to use RBP even if
5345 // RBP is not used to hold the frame pointer.
5346 operand no_rbp_RegP()
5347 %{
5348 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5349 match(RegP);
5350 match(rbx_RegP);
5351 match(rsi_RegP);
5352 match(rdi_RegP);
5353
5354 format %{ %}
5355 interface(REG_INTER);
5356 %}
5357
5358 // Special Registers
5359 // Return a pointer value
5360 operand rax_RegP()
5361 %{
5362 constraint(ALLOC_IN_RC(ptr_rax_reg));
5363 match(RegP);
5364 match(rRegP);
5365
5366 format %{ %}
5367 interface(REG_INTER);
5368 %}
5369
5370 // Special Registers
5371 // Return a compressed pointer value
5372 operand rax_RegN()
5373 %{
5374 constraint(ALLOC_IN_RC(int_rax_reg));
5375 match(RegN);
5376 match(rRegN);
5377
5378 format %{ %}
5379 interface(REG_INTER);
5380 %}
5381
5382 // Used in AtomicAdd
5383 operand rbx_RegP()
5384 %{
5385 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5386 match(RegP);
5387 match(rRegP);
5388
5389 format %{ %}
5390 interface(REG_INTER);
5391 %}
5392
5393 operand rsi_RegP()
5394 %{
5395 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5396 match(RegP);
5397 match(rRegP);
5398
5399 format %{ %}
5400 interface(REG_INTER);
5401 %}
5402
5403 operand rbp_RegP()
5404 %{
5405 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5406 match(RegP);
5407 match(rRegP);
5408
5409 format %{ %}
5410 interface(REG_INTER);
5411 %}
5412
5413 // Used in rep stosq
5414 operand rdi_RegP()
5415 %{
5416 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5417 match(RegP);
5418 match(rRegP);
5419
5420 format %{ %}
5421 interface(REG_INTER);
5422 %}
5423
5424 operand r15_RegP()
5425 %{
5426 constraint(ALLOC_IN_RC(ptr_r15_reg));
5427 match(RegP);
5428 match(rRegP);
5429
5430 format %{ %}
5431 interface(REG_INTER);
5432 %}
5433
5434 operand rRegL()
5435 %{
5436 constraint(ALLOC_IN_RC(long_reg));
5437 match(RegL);
5438 match(rax_RegL);
5439 match(rdx_RegL);
5440
5441 format %{ %}
5442 interface(REG_INTER);
5443 %}
5444
5445 // Special Registers
5446 operand no_rax_rdx_RegL()
5447 %{
5448 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5449 match(RegL);
5450 match(rRegL);
5451
5452 format %{ %}
5453 interface(REG_INTER);
5454 %}
5455
5456 operand rax_RegL()
5457 %{
5458 constraint(ALLOC_IN_RC(long_rax_reg));
5459 match(RegL);
5460 match(rRegL);
5461
5462 format %{ "RAX" %}
5463 interface(REG_INTER);
5464 %}
5465
5466 operand rcx_RegL()
5467 %{
5468 constraint(ALLOC_IN_RC(long_rcx_reg));
5469 match(RegL);
5470 match(rRegL);
5471
5472 format %{ %}
5473 interface(REG_INTER);
5474 %}
5475
5476 operand rdx_RegL()
5477 %{
5478 constraint(ALLOC_IN_RC(long_rdx_reg));
5479 match(RegL);
5480 match(rRegL);
5481
5482 format %{ %}
5483 interface(REG_INTER);
5484 %}
5485
5486 operand r11_RegL()
5487 %{
5488 constraint(ALLOC_IN_RC(long_r11_reg));
5489 match(RegL);
5490 match(rRegL);
5491
5492 format %{ %}
5493 interface(REG_INTER);
5494 %}
5495
5496 operand no_rbp_r13_RegL()
5497 %{
5498 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5499 match(RegL);
5500 match(rRegL);
5501 match(rax_RegL);
5502 match(rcx_RegL);
5503 match(rdx_RegL);
5504
5505 format %{ %}
5506 interface(REG_INTER);
5507 %}
5508
5509 // Flags register, used as output of compare instructions
5510 operand rFlagsReg()
5511 %{
5512 constraint(ALLOC_IN_RC(int_flags));
5513 match(RegFlags);
5514
5515 format %{ "RFLAGS" %}
5516 interface(REG_INTER);
5517 %}
5518
5519 // Flags register, used as output of FLOATING POINT compare instructions
5520 operand rFlagsRegU()
5521 %{
5522 constraint(ALLOC_IN_RC(int_flags));
5523 match(RegFlags);
5524
5525 format %{ "RFLAGS_U" %}
5526 interface(REG_INTER);
5527 %}
5528
5529 operand rFlagsRegUCF() %{
5530 constraint(ALLOC_IN_RC(int_flags));
5531 match(RegFlags);
5532 predicate(false);
5533
5534 format %{ "RFLAGS_U_CF" %}
5535 interface(REG_INTER);
5536 %}
5537
5538 // Float register operands
5539 operand regF() %{
5540 constraint(ALLOC_IN_RC(float_reg));
5541 match(RegF);
5542
5543 format %{ %}
5544 interface(REG_INTER);
5545 %}
5546
5547 // Float register operands
5548 operand legRegF() %{
5549 constraint(ALLOC_IN_RC(float_reg_legacy));
5550 match(RegF);
5551
5552 format %{ %}
5553 interface(REG_INTER);
5554 %}
5555
5556 // Float register operands
5557 operand vlRegF() %{
5558 constraint(ALLOC_IN_RC(float_reg_vl));
5559 match(RegF);
5560
5561 format %{ %}
5562 interface(REG_INTER);
5563 %}
5564
5565 // Double register operands
5566 operand regD() %{
5567 constraint(ALLOC_IN_RC(double_reg));
5568 match(RegD);
5569
5570 format %{ %}
5571 interface(REG_INTER);
5572 %}
5573
5574 // Double register operands
5575 operand legRegD() %{
5576 constraint(ALLOC_IN_RC(double_reg_legacy));
5577 match(RegD);
5578
5579 format %{ %}
5580 interface(REG_INTER);
5581 %}
5582
5583 // Double register operands
5584 operand vlRegD() %{
5585 constraint(ALLOC_IN_RC(double_reg_vl));
5586 match(RegD);
5587
5588 format %{ %}
5589 interface(REG_INTER);
5590 %}
5591
5592 //----------Memory Operands----------------------------------------------------
5593 // Direct Memory Operand
5594 // operand direct(immP addr)
5595 // %{
5596 // match(addr);
5597
5598 // format %{ "[$addr]" %}
5599 // interface(MEMORY_INTER) %{
5600 // base(0xFFFFFFFF);
5601 // index(0x4);
5602 // scale(0x0);
5603 // disp($addr);
5604 // %}
5605 // %}
5606
5607 // Indirect Memory Operand
5608 operand indirect(any_RegP reg)
5609 %{
5610 constraint(ALLOC_IN_RC(ptr_reg));
5611 match(reg);
5612
5613 format %{ "[$reg]" %}
5614 interface(MEMORY_INTER) %{
5615 base($reg);
5616 index(0x4);
5617 scale(0x0);
5618 disp(0x0);
5619 %}
5620 %}
5621
5622 // Indirect Memory Plus Short Offset Operand
5623 operand indOffset8(any_RegP reg, immL8 off)
5624 %{
5625 constraint(ALLOC_IN_RC(ptr_reg));
5626 match(AddP reg off);
5627
5628 format %{ "[$reg + $off (8-bit)]" %}
5629 interface(MEMORY_INTER) %{
5630 base($reg);
5631 index(0x4);
5632 scale(0x0);
5633 disp($off);
5634 %}
5635 %}
5636
5637 // Indirect Memory Plus Long Offset Operand
5638 operand indOffset32(any_RegP reg, immL32 off)
5639 %{
5640 constraint(ALLOC_IN_RC(ptr_reg));
5641 match(AddP reg off);
5642
5643 format %{ "[$reg + $off (32-bit)]" %}
5644 interface(MEMORY_INTER) %{
5645 base($reg);
5646 index(0x4);
5647 scale(0x0);
5648 disp($off);
5649 %}
5650 %}
5651
5652 // Indirect Memory Plus Index Register Plus Offset Operand
5653 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5654 %{
5655 constraint(ALLOC_IN_RC(ptr_reg));
5656 match(AddP (AddP reg lreg) off);
5657
5658 op_cost(10);
5659 format %{"[$reg + $off + $lreg]" %}
5660 interface(MEMORY_INTER) %{
5661 base($reg);
5662 index($lreg);
5663 scale(0x0);
5664 disp($off);
5665 %}
5666 %}
5667
5668 // Indirect Memory Plus Index Register Plus Offset Operand
5669 operand indIndex(any_RegP reg, rRegL lreg)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(AddP reg lreg);
5673
5674 op_cost(10);
5675 format %{"[$reg + $lreg]" %}
5676 interface(MEMORY_INTER) %{
5677 base($reg);
5678 index($lreg);
5679 scale(0x0);
5680 disp(0x0);
5681 %}
5682 %}
5683
5684 // Indirect Memory Times Scale Plus Index Register
5685 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5686 %{
5687 constraint(ALLOC_IN_RC(ptr_reg));
5688 match(AddP reg (LShiftL lreg scale));
5689
5690 op_cost(10);
5691 format %{"[$reg + $lreg << $scale]" %}
5692 interface(MEMORY_INTER) %{
5693 base($reg);
5694 index($lreg);
5695 scale($scale);
5696 disp(0x0);
5697 %}
5698 %}
5699
5700 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5701 %{
5702 constraint(ALLOC_IN_RC(ptr_reg));
5703 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5704 match(AddP reg (LShiftL (ConvI2L idx) scale));
5705
5706 op_cost(10);
5707 format %{"[$reg + pos $idx << $scale]" %}
5708 interface(MEMORY_INTER) %{
5709 base($reg);
5710 index($idx);
5711 scale($scale);
5712 disp(0x0);
5713 %}
5714 %}
5715
5716 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5717 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5718 %{
5719 constraint(ALLOC_IN_RC(ptr_reg));
5720 match(AddP (AddP reg (LShiftL lreg scale)) off);
5721
5722 op_cost(10);
5723 format %{"[$reg + $off + $lreg << $scale]" %}
5724 interface(MEMORY_INTER) %{
5725 base($reg);
5726 index($lreg);
5727 scale($scale);
5728 disp($off);
5729 %}
5730 %}
5731
5732 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5733 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5734 %{
5735 constraint(ALLOC_IN_RC(ptr_reg));
5736 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5737 match(AddP (AddP reg (ConvI2L idx)) off);
5738
5739 op_cost(10);
5740 format %{"[$reg + $off + $idx]" %}
5741 interface(MEMORY_INTER) %{
5742 base($reg);
5743 index($idx);
5744 scale(0x0);
5745 disp($off);
5746 %}
5747 %}
5748
5749 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5750 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5751 %{
5752 constraint(ALLOC_IN_RC(ptr_reg));
5753 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5754 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5755
5756 op_cost(10);
5757 format %{"[$reg + $off + $idx << $scale]" %}
5758 interface(MEMORY_INTER) %{
5759 base($reg);
5760 index($idx);
5761 scale($scale);
5762 disp($off);
5763 %}
5764 %}
5765
5766 // Indirect Narrow Oop Plus Offset Operand
5767 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5768 // we can't free r12 even with CompressedOops::base() == nullptr.
5769 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5770 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5771 constraint(ALLOC_IN_RC(ptr_reg));
5772 match(AddP (DecodeN reg) off);
5773
5774 op_cost(10);
5775 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5776 interface(MEMORY_INTER) %{
5777 base(0xc); // R12
5778 index($reg);
5779 scale(0x3);
5780 disp($off);
5781 %}
5782 %}
5783
5784 // Indirect Memory Operand
5785 operand indirectNarrow(rRegN reg)
5786 %{
5787 predicate(CompressedOops::shift() == 0);
5788 constraint(ALLOC_IN_RC(ptr_reg));
5789 match(DecodeN reg);
5790
5791 format %{ "[$reg]" %}
5792 interface(MEMORY_INTER) %{
5793 base($reg);
5794 index(0x4);
5795 scale(0x0);
5796 disp(0x0);
5797 %}
5798 %}
5799
5800 // Indirect Memory Plus Short Offset Operand
5801 operand indOffset8Narrow(rRegN reg, immL8 off)
5802 %{
5803 predicate(CompressedOops::shift() == 0);
5804 constraint(ALLOC_IN_RC(ptr_reg));
5805 match(AddP (DecodeN reg) off);
5806
5807 format %{ "[$reg + $off (8-bit)]" %}
5808 interface(MEMORY_INTER) %{
5809 base($reg);
5810 index(0x4);
5811 scale(0x0);
5812 disp($off);
5813 %}
5814 %}
5815
5816 // Indirect Memory Plus Long Offset Operand
5817 operand indOffset32Narrow(rRegN reg, immL32 off)
5818 %{
5819 predicate(CompressedOops::shift() == 0);
5820 constraint(ALLOC_IN_RC(ptr_reg));
5821 match(AddP (DecodeN reg) off);
5822
5823 format %{ "[$reg + $off (32-bit)]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index(0x4);
5827 scale(0x0);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Memory Plus Index Register Plus Offset Operand
5833 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5834 %{
5835 predicate(CompressedOops::shift() == 0);
5836 constraint(ALLOC_IN_RC(ptr_reg));
5837 match(AddP (AddP (DecodeN reg) lreg) off);
5838
5839 op_cost(10);
5840 format %{"[$reg + $off + $lreg]" %}
5841 interface(MEMORY_INTER) %{
5842 base($reg);
5843 index($lreg);
5844 scale(0x0);
5845 disp($off);
5846 %}
5847 %}
5848
5849 // Indirect Memory Plus Index Register Plus Offset Operand
5850 operand indIndexNarrow(rRegN reg, rRegL lreg)
5851 %{
5852 predicate(CompressedOops::shift() == 0);
5853 constraint(ALLOC_IN_RC(ptr_reg));
5854 match(AddP (DecodeN reg) lreg);
5855
5856 op_cost(10);
5857 format %{"[$reg + $lreg]" %}
5858 interface(MEMORY_INTER) %{
5859 base($reg);
5860 index($lreg);
5861 scale(0x0);
5862 disp(0x0);
5863 %}
5864 %}
5865
5866 // Indirect Memory Times Scale Plus Index Register
5867 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5868 %{
5869 predicate(CompressedOops::shift() == 0);
5870 constraint(ALLOC_IN_RC(ptr_reg));
5871 match(AddP (DecodeN reg) (LShiftL lreg scale));
5872
5873 op_cost(10);
5874 format %{"[$reg + $lreg << $scale]" %}
5875 interface(MEMORY_INTER) %{
5876 base($reg);
5877 index($lreg);
5878 scale($scale);
5879 disp(0x0);
5880 %}
5881 %}
5882
5883 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5884 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5885 %{
5886 predicate(CompressedOops::shift() == 0);
5887 constraint(ALLOC_IN_RC(ptr_reg));
5888 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5889
5890 op_cost(10);
5891 format %{"[$reg + $off + $lreg << $scale]" %}
5892 interface(MEMORY_INTER) %{
5893 base($reg);
5894 index($lreg);
5895 scale($scale);
5896 disp($off);
5897 %}
5898 %}
5899
5900 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5901 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5902 %{
5903 constraint(ALLOC_IN_RC(ptr_reg));
5904 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5905 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5906
5907 op_cost(10);
5908 format %{"[$reg + $off + $idx]" %}
5909 interface(MEMORY_INTER) %{
5910 base($reg);
5911 index($idx);
5912 scale(0x0);
5913 disp($off);
5914 %}
5915 %}
5916
5917 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5918 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5919 %{
5920 constraint(ALLOC_IN_RC(ptr_reg));
5921 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5922 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5923
5924 op_cost(10);
5925 format %{"[$reg + $off + $idx << $scale]" %}
5926 interface(MEMORY_INTER) %{
5927 base($reg);
5928 index($idx);
5929 scale($scale);
5930 disp($off);
5931 %}
5932 %}
5933
5934 //----------Special Memory Operands--------------------------------------------
5935 // Stack Slot Operand - This operand is used for loading and storing temporary
5936 // values on the stack where a match requires a value to
5937 // flow through memory.
5938 operand stackSlotP(sRegP reg)
5939 %{
5940 constraint(ALLOC_IN_RC(stack_slots));
5941 // No match rule because this operand is only generated in matching
5942
5943 format %{ "[$reg]" %}
5944 interface(MEMORY_INTER) %{
5945 base(0x4); // RSP
5946 index(0x4); // No Index
5947 scale(0x0); // No Scale
5948 disp($reg); // Stack Offset
5949 %}
5950 %}
5951
5952 operand stackSlotI(sRegI reg)
5953 %{
5954 constraint(ALLOC_IN_RC(stack_slots));
5955 // No match rule because this operand is only generated in matching
5956
5957 format %{ "[$reg]" %}
5958 interface(MEMORY_INTER) %{
5959 base(0x4); // RSP
5960 index(0x4); // No Index
5961 scale(0x0); // No Scale
5962 disp($reg); // Stack Offset
5963 %}
5964 %}
5965
5966 operand stackSlotF(sRegF reg)
5967 %{
5968 constraint(ALLOC_IN_RC(stack_slots));
5969 // No match rule because this operand is only generated in matching
5970
5971 format %{ "[$reg]" %}
5972 interface(MEMORY_INTER) %{
5973 base(0x4); // RSP
5974 index(0x4); // No Index
5975 scale(0x0); // No Scale
5976 disp($reg); // Stack Offset
5977 %}
5978 %}
5979
5980 operand stackSlotD(sRegD reg)
5981 %{
5982 constraint(ALLOC_IN_RC(stack_slots));
5983 // No match rule because this operand is only generated in matching
5984
5985 format %{ "[$reg]" %}
5986 interface(MEMORY_INTER) %{
5987 base(0x4); // RSP
5988 index(0x4); // No Index
5989 scale(0x0); // No Scale
5990 disp($reg); // Stack Offset
5991 %}
5992 %}
5993 operand stackSlotL(sRegL reg)
5994 %{
5995 constraint(ALLOC_IN_RC(stack_slots));
5996 // No match rule because this operand is only generated in matching
5997
5998 format %{ "[$reg]" %}
5999 interface(MEMORY_INTER) %{
6000 base(0x4); // RSP
6001 index(0x4); // No Index
6002 scale(0x0); // No Scale
6003 disp($reg); // Stack Offset
6004 %}
6005 %}
6006
6007 //----------Conditional Branch Operands----------------------------------------
6008 // Comparison Op - This is the operation of the comparison, and is limited to
6009 // the following set of codes:
6010 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6011 //
6012 // Other attributes of the comparison, such as unsignedness, are specified
6013 // by the comparison instruction that sets a condition code flags register.
6014 // That result is represented by a flags operand whose subtype is appropriate
6015 // to the unsignedness (etc.) of the comparison.
6016 //
6017 // Later, the instruction which matches both the Comparison Op (a Bool) and
6018 // the flags (produced by the Cmp) specifies the coding of the comparison op
6019 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6020
6021 // Comparison Code
6022 operand cmpOp()
6023 %{
6024 match(Bool);
6025
6026 format %{ "" %}
6027 interface(COND_INTER) %{
6028 equal(0x4, "e");
6029 not_equal(0x5, "ne");
6030 less(0xC, "l");
6031 greater_equal(0xD, "ge");
6032 less_equal(0xE, "le");
6033 greater(0xF, "g");
6034 overflow(0x0, "o");
6035 no_overflow(0x1, "no");
6036 %}
6037 %}
6038
6039 // Comparison Code, unsigned compare. Used by FP also, with
6040 // C2 (unordered) turned into GT or LT already. The other bits
6041 // C0 and C3 are turned into Carry & Zero flags.
6042 operand cmpOpU()
6043 %{
6044 match(Bool);
6045
6046 format %{ "" %}
6047 interface(COND_INTER) %{
6048 equal(0x4, "e");
6049 not_equal(0x5, "ne");
6050 less(0x2, "b");
6051 greater_equal(0x3, "ae");
6052 less_equal(0x6, "be");
6053 greater(0x7, "a");
6054 overflow(0x0, "o");
6055 no_overflow(0x1, "no");
6056 %}
6057 %}
6058
6059
6060 // Floating comparisons that don't require any fixup for the unordered case,
6061 // If both inputs of the comparison are the same, ZF is always set so we
6062 // don't need to use cmpOpUCF2 for eq/ne
6063 operand cmpOpUCF() %{
6064 match(Bool);
6065 predicate(n->as_Bool()->_test._test == BoolTest::lt ||
6066 n->as_Bool()->_test._test == BoolTest::ge ||
6067 n->as_Bool()->_test._test == BoolTest::le ||
6068 n->as_Bool()->_test._test == BoolTest::gt ||
6069 n->in(1)->in(1) == n->in(1)->in(2));
6070 format %{ "" %}
6071 interface(COND_INTER) %{
6072 equal(0xb, "np");
6073 not_equal(0xa, "p");
6074 less(0x2, "b");
6075 greater_equal(0x3, "ae");
6076 less_equal(0x6, "be");
6077 greater(0x7, "a");
6078 overflow(0x0, "o");
6079 no_overflow(0x1, "no");
6080 %}
6081 %}
6082
6083
6084 // Floating comparisons that can be fixed up with extra conditional jumps
6085 operand cmpOpUCF2() %{
6086 match(Bool);
6087 predicate((n->as_Bool()->_test._test == BoolTest::ne ||
6088 n->as_Bool()->_test._test == BoolTest::eq) &&
6089 n->in(1)->in(1) != n->in(1)->in(2));
6090 format %{ "" %}
6091 interface(COND_INTER) %{
6092 equal(0x4, "e");
6093 not_equal(0x5, "ne");
6094 less(0x2, "b");
6095 greater_equal(0x3, "ae");
6096 less_equal(0x6, "be");
6097 greater(0x7, "a");
6098 overflow(0x0, "o");
6099 no_overflow(0x1, "no");
6100 %}
6101 %}
6102
6103 // Operands for bound floating pointer register arguments
6104 operand rxmm0() %{
6105 constraint(ALLOC_IN_RC(xmm0_reg));
6106 match(VecX);
6107 format%{%}
6108 interface(REG_INTER);
6109 %}
6110
6111 // Vectors
6112
6113 // Dummy generic vector class. Should be used for all vector operands.
6114 // Replaced with vec[SDXYZ] during post-selection pass.
6115 operand vec() %{
6116 constraint(ALLOC_IN_RC(dynamic));
6117 match(VecX);
6118 match(VecY);
6119 match(VecZ);
6120 match(VecS);
6121 match(VecD);
6122
6123 format %{ %}
6124 interface(REG_INTER);
6125 %}
6126
6127 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6128 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6129 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6130 // runtime code generation via reg_class_dynamic.
6131 operand legVec() %{
6132 constraint(ALLOC_IN_RC(dynamic));
6133 match(VecX);
6134 match(VecY);
6135 match(VecZ);
6136 match(VecS);
6137 match(VecD);
6138
6139 format %{ %}
6140 interface(REG_INTER);
6141 %}
6142
6143 // Replaces vec during post-selection cleanup. See above.
6144 operand vecS() %{
6145 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6146 match(VecS);
6147
6148 format %{ %}
6149 interface(REG_INTER);
6150 %}
6151
6152 // Replaces legVec during post-selection cleanup. See above.
6153 operand legVecS() %{
6154 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6155 match(VecS);
6156
6157 format %{ %}
6158 interface(REG_INTER);
6159 %}
6160
6161 // Replaces vec during post-selection cleanup. See above.
6162 operand vecD() %{
6163 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6164 match(VecD);
6165
6166 format %{ %}
6167 interface(REG_INTER);
6168 %}
6169
6170 // Replaces legVec during post-selection cleanup. See above.
6171 operand legVecD() %{
6172 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6173 match(VecD);
6174
6175 format %{ %}
6176 interface(REG_INTER);
6177 %}
6178
6179 // Replaces vec during post-selection cleanup. See above.
6180 operand vecX() %{
6181 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6182 match(VecX);
6183
6184 format %{ %}
6185 interface(REG_INTER);
6186 %}
6187
6188 // Replaces legVec during post-selection cleanup. See above.
6189 operand legVecX() %{
6190 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6191 match(VecX);
6192
6193 format %{ %}
6194 interface(REG_INTER);
6195 %}
6196
6197 // Replaces vec during post-selection cleanup. See above.
6198 operand vecY() %{
6199 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6200 match(VecY);
6201
6202 format %{ %}
6203 interface(REG_INTER);
6204 %}
6205
6206 // Replaces legVec during post-selection cleanup. See above.
6207 operand legVecY() %{
6208 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6209 match(VecY);
6210
6211 format %{ %}
6212 interface(REG_INTER);
6213 %}
6214
6215 // Replaces vec during post-selection cleanup. See above.
6216 operand vecZ() %{
6217 constraint(ALLOC_IN_RC(vectorz_reg));
6218 match(VecZ);
6219
6220 format %{ %}
6221 interface(REG_INTER);
6222 %}
6223
6224 // Replaces legVec during post-selection cleanup. See above.
6225 operand legVecZ() %{
6226 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6227 match(VecZ);
6228
6229 format %{ %}
6230 interface(REG_INTER);
6231 %}
6232
6233 //----------OPERAND CLASSES----------------------------------------------------
6234 // Operand Classes are groups of operands that are used as to simplify
6235 // instruction definitions by not requiring the AD writer to specify separate
6236 // instructions for every form of operand when the instruction accepts
6237 // multiple operand types with the same basic encoding and format. The classic
6238 // case of this is memory operands.
6239
6240 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6241 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6242 indCompressedOopOffset,
6243 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6244 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6245 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6246
6247 //----------PIPELINE-----------------------------------------------------------
6248 // Rules which define the behavior of the target architectures pipeline.
6249 pipeline %{
6250
6251 //----------ATTRIBUTES---------------------------------------------------------
6252 attributes %{
6253 variable_size_instructions; // Fixed size instructions
6254 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6255 instruction_unit_size = 1; // An instruction is 1 bytes long
6256 instruction_fetch_unit_size = 16; // The processor fetches one line
6257 instruction_fetch_units = 1; // of 16 bytes
6258 %}
6259
6260 //----------RESOURCES----------------------------------------------------------
6261 // Resources are the functional units available to the machine
6262
6263 // Generic P2/P3 pipeline
6264 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6265 // 3 instructions decoded per cycle.
6266 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6267 // 3 ALU op, only ALU0 handles mul instructions.
6268 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6269 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6270 BR, FPU,
6271 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6272
6273 //----------PIPELINE DESCRIPTION-----------------------------------------------
6274 // Pipeline Description specifies the stages in the machine's pipeline
6275
6276 // Generic P2/P3 pipeline
6277 pipe_desc(S0, S1, S2, S3, S4, S5);
6278
6279 //----------PIPELINE CLASSES---------------------------------------------------
6280 // Pipeline Classes describe the stages in which input and output are
6281 // referenced by the hardware pipeline.
6282
6283 // Naming convention: ialu or fpu
6284 // Then: _reg
6285 // Then: _reg if there is a 2nd register
6286 // Then: _long if it's a pair of instructions implementing a long
6287 // Then: _fat if it requires the big decoder
6288 // Or: _mem if it requires the big decoder and a memory unit.
6289
6290 // Integer ALU reg operation
6291 pipe_class ialu_reg(rRegI dst)
6292 %{
6293 single_instruction;
6294 dst : S4(write);
6295 dst : S3(read);
6296 DECODE : S0; // any decoder
6297 ALU : S3; // any alu
6298 %}
6299
6300 // Long ALU reg operation
6301 pipe_class ialu_reg_long(rRegL dst)
6302 %{
6303 instruction_count(2);
6304 dst : S4(write);
6305 dst : S3(read);
6306 DECODE : S0(2); // any 2 decoders
6307 ALU : S3(2); // both alus
6308 %}
6309
6310 // Integer ALU reg operation using big decoder
6311 pipe_class ialu_reg_fat(rRegI dst)
6312 %{
6313 single_instruction;
6314 dst : S4(write);
6315 dst : S3(read);
6316 D0 : S0; // big decoder only
6317 ALU : S3; // any alu
6318 %}
6319
6320 // Integer ALU reg-reg operation
6321 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6322 %{
6323 single_instruction;
6324 dst : S4(write);
6325 src : S3(read);
6326 DECODE : S0; // any decoder
6327 ALU : S3; // any alu
6328 %}
6329
6330 // Integer ALU reg-reg operation
6331 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6332 %{
6333 single_instruction;
6334 dst : S4(write);
6335 src : S3(read);
6336 D0 : S0; // big decoder only
6337 ALU : S3; // any alu
6338 %}
6339
6340 // Integer ALU reg-mem operation
6341 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6342 %{
6343 single_instruction;
6344 dst : S5(write);
6345 mem : S3(read);
6346 D0 : S0; // big decoder only
6347 ALU : S4; // any alu
6348 MEM : S3; // any mem
6349 %}
6350
6351 // Integer mem operation (prefetch)
6352 pipe_class ialu_mem(memory mem)
6353 %{
6354 single_instruction;
6355 mem : S3(read);
6356 D0 : S0; // big decoder only
6357 MEM : S3; // any mem
6358 %}
6359
6360 // Integer Store to Memory
6361 pipe_class ialu_mem_reg(memory mem, rRegI src)
6362 %{
6363 single_instruction;
6364 mem : S3(read);
6365 src : S5(read);
6366 D0 : S0; // big decoder only
6367 ALU : S4; // any alu
6368 MEM : S3;
6369 %}
6370
6371 // // Long Store to Memory
6372 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6373 // %{
6374 // instruction_count(2);
6375 // mem : S3(read);
6376 // src : S5(read);
6377 // D0 : S0(2); // big decoder only; twice
6378 // ALU : S4(2); // any 2 alus
6379 // MEM : S3(2); // Both mems
6380 // %}
6381
6382 // Integer Store to Memory
6383 pipe_class ialu_mem_imm(memory mem)
6384 %{
6385 single_instruction;
6386 mem : S3(read);
6387 D0 : S0; // big decoder only
6388 ALU : S4; // any alu
6389 MEM : S3;
6390 %}
6391
6392 // Integer ALU0 reg-reg operation
6393 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6394 %{
6395 single_instruction;
6396 dst : S4(write);
6397 src : S3(read);
6398 D0 : S0; // Big decoder only
6399 ALU0 : S3; // only alu0
6400 %}
6401
6402 // Integer ALU0 reg-mem operation
6403 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6404 %{
6405 single_instruction;
6406 dst : S5(write);
6407 mem : S3(read);
6408 D0 : S0; // big decoder only
6409 ALU0 : S4; // ALU0 only
6410 MEM : S3; // any mem
6411 %}
6412
6413 // Integer ALU reg-reg operation
6414 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6415 %{
6416 single_instruction;
6417 cr : S4(write);
6418 src1 : S3(read);
6419 src2 : S3(read);
6420 DECODE : S0; // any decoder
6421 ALU : S3; // any alu
6422 %}
6423
6424 // Integer ALU reg-imm operation
6425 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6426 %{
6427 single_instruction;
6428 cr : S4(write);
6429 src1 : S3(read);
6430 DECODE : S0; // any decoder
6431 ALU : S3; // any alu
6432 %}
6433
6434 // Integer ALU reg-mem operation
6435 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6436 %{
6437 single_instruction;
6438 cr : S4(write);
6439 src1 : S3(read);
6440 src2 : S3(read);
6441 D0 : S0; // big decoder only
6442 ALU : S4; // any alu
6443 MEM : S3;
6444 %}
6445
6446 // Conditional move reg-reg
6447 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6448 %{
6449 instruction_count(4);
6450 y : S4(read);
6451 q : S3(read);
6452 p : S3(read);
6453 DECODE : S0(4); // any decoder
6454 %}
6455
6456 // Conditional move reg-reg
6457 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6458 %{
6459 single_instruction;
6460 dst : S4(write);
6461 src : S3(read);
6462 cr : S3(read);
6463 DECODE : S0; // any decoder
6464 %}
6465
6466 // Conditional move reg-mem
6467 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6468 %{
6469 single_instruction;
6470 dst : S4(write);
6471 src : S3(read);
6472 cr : S3(read);
6473 DECODE : S0; // any decoder
6474 MEM : S3;
6475 %}
6476
6477 // Conditional move reg-reg long
6478 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6479 %{
6480 single_instruction;
6481 dst : S4(write);
6482 src : S3(read);
6483 cr : S3(read);
6484 DECODE : S0(2); // any 2 decoders
6485 %}
6486
6487 // Float reg-reg operation
6488 pipe_class fpu_reg(regD dst)
6489 %{
6490 instruction_count(2);
6491 dst : S3(read);
6492 DECODE : S0(2); // any 2 decoders
6493 FPU : S3;
6494 %}
6495
6496 // Float reg-reg operation
6497 pipe_class fpu_reg_reg(regD dst, regD src)
6498 %{
6499 instruction_count(2);
6500 dst : S4(write);
6501 src : S3(read);
6502 DECODE : S0(2); // any 2 decoders
6503 FPU : S3;
6504 %}
6505
6506 // Float reg-reg operation
6507 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6508 %{
6509 instruction_count(3);
6510 dst : S4(write);
6511 src1 : S3(read);
6512 src2 : S3(read);
6513 DECODE : S0(3); // any 3 decoders
6514 FPU : S3(2);
6515 %}
6516
6517 // Float reg-reg operation
6518 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6519 %{
6520 instruction_count(4);
6521 dst : S4(write);
6522 src1 : S3(read);
6523 src2 : S3(read);
6524 src3 : S3(read);
6525 DECODE : S0(4); // any 3 decoders
6526 FPU : S3(2);
6527 %}
6528
6529 // Float reg-reg operation
6530 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6531 %{
6532 instruction_count(4);
6533 dst : S4(write);
6534 src1 : S3(read);
6535 src2 : S3(read);
6536 src3 : S3(read);
6537 DECODE : S1(3); // any 3 decoders
6538 D0 : S0; // Big decoder only
6539 FPU : S3(2);
6540 MEM : S3;
6541 %}
6542
6543 // Float reg-mem operation
6544 pipe_class fpu_reg_mem(regD dst, memory mem)
6545 %{
6546 instruction_count(2);
6547 dst : S5(write);
6548 mem : S3(read);
6549 D0 : S0; // big decoder only
6550 DECODE : S1; // any decoder for FPU POP
6551 FPU : S4;
6552 MEM : S3; // any mem
6553 %}
6554
6555 // Float reg-mem operation
6556 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6557 %{
6558 instruction_count(3);
6559 dst : S5(write);
6560 src1 : S3(read);
6561 mem : S3(read);
6562 D0 : S0; // big decoder only
6563 DECODE : S1(2); // any decoder for FPU POP
6564 FPU : S4;
6565 MEM : S3; // any mem
6566 %}
6567
6568 // Float mem-reg operation
6569 pipe_class fpu_mem_reg(memory mem, regD src)
6570 %{
6571 instruction_count(2);
6572 src : S5(read);
6573 mem : S3(read);
6574 DECODE : S0; // any decoder for FPU PUSH
6575 D0 : S1; // big decoder only
6576 FPU : S4;
6577 MEM : S3; // any mem
6578 %}
6579
6580 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6581 %{
6582 instruction_count(3);
6583 src1 : S3(read);
6584 src2 : S3(read);
6585 mem : S3(read);
6586 DECODE : S0(2); // any decoder for FPU PUSH
6587 D0 : S1; // big decoder only
6588 FPU : S4;
6589 MEM : S3; // any mem
6590 %}
6591
6592 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6593 %{
6594 instruction_count(3);
6595 src1 : S3(read);
6596 src2 : S3(read);
6597 mem : S4(read);
6598 DECODE : S0; // any decoder for FPU PUSH
6599 D0 : S0(2); // big decoder only
6600 FPU : S4;
6601 MEM : S3(2); // any mem
6602 %}
6603
6604 pipe_class fpu_mem_mem(memory dst, memory src1)
6605 %{
6606 instruction_count(2);
6607 src1 : S3(read);
6608 dst : S4(read);
6609 D0 : S0(2); // big decoder only
6610 MEM : S3(2); // any mem
6611 %}
6612
6613 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6614 %{
6615 instruction_count(3);
6616 src1 : S3(read);
6617 src2 : S3(read);
6618 dst : S4(read);
6619 D0 : S0(3); // big decoder only
6620 FPU : S4;
6621 MEM : S3(3); // any mem
6622 %}
6623
6624 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6625 %{
6626 instruction_count(3);
6627 src1 : S4(read);
6628 mem : S4(read);
6629 DECODE : S0; // any decoder for FPU PUSH
6630 D0 : S0(2); // big decoder only
6631 FPU : S4;
6632 MEM : S3(2); // any mem
6633 %}
6634
6635 // Float load constant
6636 pipe_class fpu_reg_con(regD dst)
6637 %{
6638 instruction_count(2);
6639 dst : S5(write);
6640 D0 : S0; // big decoder only for the load
6641 DECODE : S1; // any decoder for FPU POP
6642 FPU : S4;
6643 MEM : S3; // any mem
6644 %}
6645
6646 // Float load constant
6647 pipe_class fpu_reg_reg_con(regD dst, regD src)
6648 %{
6649 instruction_count(3);
6650 dst : S5(write);
6651 src : S3(read);
6652 D0 : S0; // big decoder only for the load
6653 DECODE : S1(2); // any decoder for FPU POP
6654 FPU : S4;
6655 MEM : S3; // any mem
6656 %}
6657
6658 // UnConditional branch
6659 pipe_class pipe_jmp(label labl)
6660 %{
6661 single_instruction;
6662 BR : S3;
6663 %}
6664
6665 // Conditional branch
6666 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6667 %{
6668 single_instruction;
6669 cr : S1(read);
6670 BR : S3;
6671 %}
6672
6673 // Allocation idiom
6674 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6675 %{
6676 instruction_count(1); force_serialization;
6677 fixed_latency(6);
6678 heap_ptr : S3(read);
6679 DECODE : S0(3);
6680 D0 : S2;
6681 MEM : S3;
6682 ALU : S3(2);
6683 dst : S5(write);
6684 BR : S5;
6685 %}
6686
6687 // Generic big/slow expanded idiom
6688 pipe_class pipe_slow()
6689 %{
6690 instruction_count(10); multiple_bundles; force_serialization;
6691 fixed_latency(100);
6692 D0 : S0(2);
6693 MEM : S3(2);
6694 %}
6695
6696 // The real do-nothing guy
6697 pipe_class empty()
6698 %{
6699 instruction_count(0);
6700 %}
6701
6702 // Define the class for the Nop node
6703 define
6704 %{
6705 MachNop = empty;
6706 %}
6707
6708 %}
6709
6710 //----------INSTRUCTIONS-------------------------------------------------------
6711 //
6712 // match -- States which machine-independent subtree may be replaced
6713 // by this instruction.
6714 // ins_cost -- The estimated cost of this instruction is used by instruction
6715 // selection to identify a minimum cost tree of machine
6716 // instructions that matches a tree of machine-independent
6717 // instructions.
6718 // format -- A string providing the disassembly for this instruction.
6719 // The value of an instruction's operand may be inserted
6720 // by referring to it with a '$' prefix.
6721 // opcode -- Three instruction opcodes may be provided. These are referred
6722 // to within an encode class as $primary, $secondary, and $tertiary
6723 // rrspectively. The primary opcode is commonly used to
6724 // indicate the type of machine instruction, while secondary
6725 // and tertiary are often used for prefix options or addressing
6726 // modes.
6727 // ins_encode -- A list of encode classes with parameters. The encode class
6728 // name must have been defined in an 'enc_class' specification
6729 // in the encode section of the architecture description.
6730
6731 // ============================================================================
6732
6733 instruct ShouldNotReachHere() %{
6734 match(Halt);
6735 format %{ "stop\t# ShouldNotReachHere" %}
6736 ins_encode %{
6737 if (is_reachable()) {
6738 const char* str = __ code_string(_halt_reason);
6739 __ stop(str);
6740 }
6741 %}
6742 ins_pipe(pipe_slow);
6743 %}
6744
6745 // ============================================================================
6746
6747 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6748 // Load Float
6749 instruct MoveF2VL(vlRegF dst, regF src) %{
6750 match(Set dst src);
6751 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6752 ins_encode %{
6753 ShouldNotReachHere();
6754 %}
6755 ins_pipe( fpu_reg_reg );
6756 %}
6757
6758 // Load Float
6759 instruct MoveF2LEG(legRegF dst, regF src) %{
6760 match(Set dst src);
6761 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6762 ins_encode %{
6763 ShouldNotReachHere();
6764 %}
6765 ins_pipe( fpu_reg_reg );
6766 %}
6767
6768 // Load Float
6769 instruct MoveVL2F(regF dst, vlRegF src) %{
6770 match(Set dst src);
6771 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6772 ins_encode %{
6773 ShouldNotReachHere();
6774 %}
6775 ins_pipe( fpu_reg_reg );
6776 %}
6777
6778 // Load Float
6779 instruct MoveLEG2F(regF dst, legRegF src) %{
6780 match(Set dst src);
6781 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6782 ins_encode %{
6783 ShouldNotReachHere();
6784 %}
6785 ins_pipe( fpu_reg_reg );
6786 %}
6787
6788 // Load Double
6789 instruct MoveD2VL(vlRegD dst, regD src) %{
6790 match(Set dst src);
6791 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6792 ins_encode %{
6793 ShouldNotReachHere();
6794 %}
6795 ins_pipe( fpu_reg_reg );
6796 %}
6797
6798 // Load Double
6799 instruct MoveD2LEG(legRegD dst, regD src) %{
6800 match(Set dst src);
6801 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6802 ins_encode %{
6803 ShouldNotReachHere();
6804 %}
6805 ins_pipe( fpu_reg_reg );
6806 %}
6807
6808 // Load Double
6809 instruct MoveVL2D(regD dst, vlRegD src) %{
6810 match(Set dst src);
6811 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6812 ins_encode %{
6813 ShouldNotReachHere();
6814 %}
6815 ins_pipe( fpu_reg_reg );
6816 %}
6817
6818 // Load Double
6819 instruct MoveLEG2D(regD dst, legRegD src) %{
6820 match(Set dst src);
6821 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6822 ins_encode %{
6823 ShouldNotReachHere();
6824 %}
6825 ins_pipe( fpu_reg_reg );
6826 %}
6827
6828 //----------Load/Store/Move Instructions---------------------------------------
6829 //----------Load Instructions--------------------------------------------------
6830
6831 // Load Byte (8 bit signed)
6832 instruct loadB(rRegI dst, memory mem)
6833 %{
6834 match(Set dst (LoadB mem));
6835
6836 ins_cost(125);
6837 format %{ "movsbl $dst, $mem\t# byte" %}
6838
6839 ins_encode %{
6840 __ movsbl($dst$$Register, $mem$$Address);
6841 %}
6842
6843 ins_pipe(ialu_reg_mem);
6844 %}
6845
6846 // Load Byte (8 bit signed) into Long Register
6847 instruct loadB2L(rRegL dst, memory mem)
6848 %{
6849 match(Set dst (ConvI2L (LoadB mem)));
6850
6851 ins_cost(125);
6852 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6853
6854 ins_encode %{
6855 __ movsbq($dst$$Register, $mem$$Address);
6856 %}
6857
6858 ins_pipe(ialu_reg_mem);
6859 %}
6860
6861 // Load Unsigned Byte (8 bit UNsigned)
6862 instruct loadUB(rRegI dst, memory mem)
6863 %{
6864 match(Set dst (LoadUB mem));
6865
6866 ins_cost(125);
6867 format %{ "movzbl $dst, $mem\t# ubyte" %}
6868
6869 ins_encode %{
6870 __ movzbl($dst$$Register, $mem$$Address);
6871 %}
6872
6873 ins_pipe(ialu_reg_mem);
6874 %}
6875
6876 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6877 instruct loadUB2L(rRegL dst, memory mem)
6878 %{
6879 match(Set dst (ConvI2L (LoadUB mem)));
6880
6881 ins_cost(125);
6882 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6883
6884 ins_encode %{
6885 __ movzbq($dst$$Register, $mem$$Address);
6886 %}
6887
6888 ins_pipe(ialu_reg_mem);
6889 %}
6890
6891 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6892 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6893 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6894 effect(KILL cr);
6895
6896 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6897 "andl $dst, right_n_bits($mask, 8)" %}
6898 ins_encode %{
6899 Register Rdst = $dst$$Register;
6900 __ movzbq(Rdst, $mem$$Address);
6901 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6902 %}
6903 ins_pipe(ialu_reg_mem);
6904 %}
6905
6906 // Load Short (16 bit signed)
6907 instruct loadS(rRegI dst, memory mem)
6908 %{
6909 match(Set dst (LoadS mem));
6910
6911 ins_cost(125);
6912 format %{ "movswl $dst, $mem\t# short" %}
6913
6914 ins_encode %{
6915 __ movswl($dst$$Register, $mem$$Address);
6916 %}
6917
6918 ins_pipe(ialu_reg_mem);
6919 %}
6920
6921 // Load Short (16 bit signed) to Byte (8 bit signed)
6922 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6923 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6924
6925 ins_cost(125);
6926 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6927 ins_encode %{
6928 __ movsbl($dst$$Register, $mem$$Address);
6929 %}
6930 ins_pipe(ialu_reg_mem);
6931 %}
6932
6933 // Load Short (16 bit signed) into Long Register
6934 instruct loadS2L(rRegL dst, memory mem)
6935 %{
6936 match(Set dst (ConvI2L (LoadS mem)));
6937
6938 ins_cost(125);
6939 format %{ "movswq $dst, $mem\t# short -> long" %}
6940
6941 ins_encode %{
6942 __ movswq($dst$$Register, $mem$$Address);
6943 %}
6944
6945 ins_pipe(ialu_reg_mem);
6946 %}
6947
6948 // Load Unsigned Short/Char (16 bit UNsigned)
6949 instruct loadUS(rRegI dst, memory mem)
6950 %{
6951 match(Set dst (LoadUS mem));
6952
6953 ins_cost(125);
6954 format %{ "movzwl $dst, $mem\t# ushort/char" %}
6955
6956 ins_encode %{
6957 __ movzwl($dst$$Register, $mem$$Address);
6958 %}
6959
6960 ins_pipe(ialu_reg_mem);
6961 %}
6962
6963 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6964 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6965 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6966
6967 ins_cost(125);
6968 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
6969 ins_encode %{
6970 __ movsbl($dst$$Register, $mem$$Address);
6971 %}
6972 ins_pipe(ialu_reg_mem);
6973 %}
6974
6975 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6976 instruct loadUS2L(rRegL dst, memory mem)
6977 %{
6978 match(Set dst (ConvI2L (LoadUS mem)));
6979
6980 ins_cost(125);
6981 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
6982
6983 ins_encode %{
6984 __ movzwq($dst$$Register, $mem$$Address);
6985 %}
6986
6987 ins_pipe(ialu_reg_mem);
6988 %}
6989
6990 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6991 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
6992 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6993
6994 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
6995 ins_encode %{
6996 __ movzbq($dst$$Register, $mem$$Address);
6997 %}
6998 ins_pipe(ialu_reg_mem);
6999 %}
7000
7001 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7002 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7003 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7004 effect(KILL cr);
7005
7006 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7007 "andl $dst, right_n_bits($mask, 16)" %}
7008 ins_encode %{
7009 Register Rdst = $dst$$Register;
7010 __ movzwq(Rdst, $mem$$Address);
7011 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7012 %}
7013 ins_pipe(ialu_reg_mem);
7014 %}
7015
7016 // Load Integer
7017 instruct loadI(rRegI dst, memory mem)
7018 %{
7019 match(Set dst (LoadI mem));
7020
7021 ins_cost(125);
7022 format %{ "movl $dst, $mem\t# int" %}
7023
7024 ins_encode %{
7025 __ movl($dst$$Register, $mem$$Address);
7026 %}
7027
7028 ins_pipe(ialu_reg_mem);
7029 %}
7030
7031 // Load Integer (32 bit signed) to Byte (8 bit signed)
7032 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7033 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7034
7035 ins_cost(125);
7036 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7037 ins_encode %{
7038 __ movsbl($dst$$Register, $mem$$Address);
7039 %}
7040 ins_pipe(ialu_reg_mem);
7041 %}
7042
7043 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7044 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7045 match(Set dst (AndI (LoadI mem) mask));
7046
7047 ins_cost(125);
7048 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7049 ins_encode %{
7050 __ movzbl($dst$$Register, $mem$$Address);
7051 %}
7052 ins_pipe(ialu_reg_mem);
7053 %}
7054
7055 // Load Integer (32 bit signed) to Short (16 bit signed)
7056 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7057 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7058
7059 ins_cost(125);
7060 format %{ "movswl $dst, $mem\t# int -> short" %}
7061 ins_encode %{
7062 __ movswl($dst$$Register, $mem$$Address);
7063 %}
7064 ins_pipe(ialu_reg_mem);
7065 %}
7066
7067 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7068 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7069 match(Set dst (AndI (LoadI mem) mask));
7070
7071 ins_cost(125);
7072 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7073 ins_encode %{
7074 __ movzwl($dst$$Register, $mem$$Address);
7075 %}
7076 ins_pipe(ialu_reg_mem);
7077 %}
7078
7079 // Load Integer into Long Register
7080 instruct loadI2L(rRegL dst, memory mem)
7081 %{
7082 match(Set dst (ConvI2L (LoadI mem)));
7083
7084 ins_cost(125);
7085 format %{ "movslq $dst, $mem\t# int -> long" %}
7086
7087 ins_encode %{
7088 __ movslq($dst$$Register, $mem$$Address);
7089 %}
7090
7091 ins_pipe(ialu_reg_mem);
7092 %}
7093
7094 // Load Integer with mask 0xFF into Long Register
7095 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7096 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7097
7098 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7099 ins_encode %{
7100 __ movzbq($dst$$Register, $mem$$Address);
7101 %}
7102 ins_pipe(ialu_reg_mem);
7103 %}
7104
7105 // Load Integer with mask 0xFFFF into Long Register
7106 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7107 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7108
7109 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7110 ins_encode %{
7111 __ movzwq($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem);
7114 %}
7115
7116 // Load Integer with a 31-bit mask into Long Register
7117 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7118 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7119 effect(KILL cr);
7120
7121 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7122 "andl $dst, $mask" %}
7123 ins_encode %{
7124 Register Rdst = $dst$$Register;
7125 __ movl(Rdst, $mem$$Address);
7126 __ andl(Rdst, $mask$$constant);
7127 %}
7128 ins_pipe(ialu_reg_mem);
7129 %}
7130
7131 // Load Unsigned Integer into Long Register
7132 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7133 %{
7134 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7135
7136 ins_cost(125);
7137 format %{ "movl $dst, $mem\t# uint -> long" %}
7138
7139 ins_encode %{
7140 __ movl($dst$$Register, $mem$$Address);
7141 %}
7142
7143 ins_pipe(ialu_reg_mem);
7144 %}
7145
7146 // Load Long
7147 instruct loadL(rRegL dst, memory mem)
7148 %{
7149 match(Set dst (LoadL mem));
7150
7151 ins_cost(125);
7152 format %{ "movq $dst, $mem\t# long" %}
7153
7154 ins_encode %{
7155 __ movq($dst$$Register, $mem$$Address);
7156 %}
7157
7158 ins_pipe(ialu_reg_mem); // XXX
7159 %}
7160
7161 // Load Range
7162 instruct loadRange(rRegI dst, memory mem)
7163 %{
7164 match(Set dst (LoadRange mem));
7165
7166 ins_cost(125); // XXX
7167 format %{ "movl $dst, $mem\t# range" %}
7168 ins_encode %{
7169 __ movl($dst$$Register, $mem$$Address);
7170 %}
7171 ins_pipe(ialu_reg_mem);
7172 %}
7173
7174 // Load Pointer
7175 instruct loadP(rRegP dst, memory mem)
7176 %{
7177 match(Set dst (LoadP mem));
7178 predicate(n->as_Load()->barrier_data() == 0);
7179
7180 ins_cost(125); // XXX
7181 format %{ "movq $dst, $mem\t# ptr" %}
7182 ins_encode %{
7183 __ movq($dst$$Register, $mem$$Address);
7184 %}
7185 ins_pipe(ialu_reg_mem); // XXX
7186 %}
7187
7188 // Load Compressed Pointer
7189 instruct loadN(rRegN dst, memory mem)
7190 %{
7191 predicate(n->as_Load()->barrier_data() == 0);
7192 match(Set dst (LoadN mem));
7193
7194 ins_cost(125); // XXX
7195 format %{ "movl $dst, $mem\t# compressed ptr" %}
7196 ins_encode %{
7197 __ movl($dst$$Register, $mem$$Address);
7198 %}
7199 ins_pipe(ialu_reg_mem); // XXX
7200 %}
7201
7202
7203 // Load Klass Pointer
7204 instruct loadKlass(rRegP dst, memory mem)
7205 %{
7206 match(Set dst (LoadKlass mem));
7207
7208 ins_cost(125); // XXX
7209 format %{ "movq $dst, $mem\t# class" %}
7210 ins_encode %{
7211 __ movq($dst$$Register, $mem$$Address);
7212 %}
7213 ins_pipe(ialu_reg_mem); // XXX
7214 %}
7215
7216 // Load narrow Klass Pointer
7217 instruct loadNKlass(rRegN dst, memory mem)
7218 %{
7219 predicate(!UseCompactObjectHeaders);
7220 match(Set dst (LoadNKlass mem));
7221
7222 ins_cost(125); // XXX
7223 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7224 ins_encode %{
7225 __ movl($dst$$Register, $mem$$Address);
7226 %}
7227 ins_pipe(ialu_reg_mem); // XXX
7228 %}
7229
7230 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7231 %{
7232 predicate(UseCompactObjectHeaders);
7233 match(Set dst (LoadNKlass mem));
7234 effect(KILL cr);
7235 ins_cost(125);
7236 format %{
7237 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7238 "shrl $dst, markWord::klass_shift_at_offset"
7239 %}
7240 ins_encode %{
7241 if (UseAPX) {
7242 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7243 }
7244 else {
7245 __ movl($dst$$Register, $mem$$Address);
7246 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7247 }
7248 %}
7249 ins_pipe(ialu_reg_mem);
7250 %}
7251
7252 // Load Float
7253 instruct loadF(regF dst, memory mem)
7254 %{
7255 match(Set dst (LoadF mem));
7256
7257 ins_cost(145); // XXX
7258 format %{ "movss $dst, $mem\t# float" %}
7259 ins_encode %{
7260 __ movflt($dst$$XMMRegister, $mem$$Address);
7261 %}
7262 ins_pipe(pipe_slow); // XXX
7263 %}
7264
7265 // Load Double
7266 instruct loadD_partial(regD dst, memory mem)
7267 %{
7268 predicate(!UseXmmLoadAndClearUpper);
7269 match(Set dst (LoadD mem));
7270
7271 ins_cost(145); // XXX
7272 format %{ "movlpd $dst, $mem\t# double" %}
7273 ins_encode %{
7274 __ movdbl($dst$$XMMRegister, $mem$$Address);
7275 %}
7276 ins_pipe(pipe_slow); // XXX
7277 %}
7278
7279 instruct loadD(regD dst, memory mem)
7280 %{
7281 predicate(UseXmmLoadAndClearUpper);
7282 match(Set dst (LoadD mem));
7283
7284 ins_cost(145); // XXX
7285 format %{ "movsd $dst, $mem\t# double" %}
7286 ins_encode %{
7287 __ movdbl($dst$$XMMRegister, $mem$$Address);
7288 %}
7289 ins_pipe(pipe_slow); // XXX
7290 %}
7291
7292 // max = java.lang.Math.max(float a, float b)
7293 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7294 predicate(VM_Version::supports_avx10_2());
7295 match(Set dst (MaxF a b));
7296 format %{ "maxF $dst, $a, $b" %}
7297 ins_encode %{
7298 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7299 %}
7300 ins_pipe( pipe_slow );
7301 %}
7302
7303 // max = java.lang.Math.max(float a, float b)
7304 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7305 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7306 match(Set dst (MaxF a b));
7307 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7308 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7309 ins_encode %{
7310 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7311 %}
7312 ins_pipe( pipe_slow );
7313 %}
7314
7315 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7316 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7317 match(Set dst (MaxF a b));
7318 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7319
7320 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7321 ins_encode %{
7322 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7323 false /*min*/, true /*single*/);
7324 %}
7325 ins_pipe( pipe_slow );
7326 %}
7327
7328 // max = java.lang.Math.max(double a, double b)
7329 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7330 predicate(VM_Version::supports_avx10_2());
7331 match(Set dst (MaxD a b));
7332 format %{ "maxD $dst, $a, $b" %}
7333 ins_encode %{
7334 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7335 %}
7336 ins_pipe( pipe_slow );
7337 %}
7338
7339 // max = java.lang.Math.max(double a, double b)
7340 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7341 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7342 match(Set dst (MaxD a b));
7343 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7344 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7345 ins_encode %{
7346 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7347 %}
7348 ins_pipe( pipe_slow );
7349 %}
7350
7351 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7352 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7353 match(Set dst (MaxD a b));
7354 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7355
7356 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7357 ins_encode %{
7358 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7359 false /*min*/, false /*single*/);
7360 %}
7361 ins_pipe( pipe_slow );
7362 %}
7363
7364 // max = java.lang.Math.min(float a, float b)
7365 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7366 predicate(VM_Version::supports_avx10_2());
7367 match(Set dst (MinF a b));
7368 format %{ "minF $dst, $a, $b" %}
7369 ins_encode %{
7370 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7371 %}
7372 ins_pipe( pipe_slow );
7373 %}
7374
7375 // min = java.lang.Math.min(float a, float b)
7376 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7377 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7378 match(Set dst (MinF a b));
7379 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7380 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7381 ins_encode %{
7382 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7383 %}
7384 ins_pipe( pipe_slow );
7385 %}
7386
7387 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7388 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7389 match(Set dst (MinF a b));
7390 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7391
7392 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7393 ins_encode %{
7394 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7395 true /*min*/, true /*single*/);
7396 %}
7397 ins_pipe( pipe_slow );
7398 %}
7399
7400 // max = java.lang.Math.min(double a, double b)
7401 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7402 predicate(VM_Version::supports_avx10_2());
7403 match(Set dst (MinD a b));
7404 format %{ "minD $dst, $a, $b" %}
7405 ins_encode %{
7406 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7407 %}
7408 ins_pipe( pipe_slow );
7409 %}
7410
7411 // min = java.lang.Math.min(double a, double b)
7412 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7413 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7414 match(Set dst (MinD a b));
7415 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7416 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7417 ins_encode %{
7418 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7419 %}
7420 ins_pipe( pipe_slow );
7421 %}
7422
7423 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7424 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7425 match(Set dst (MinD a b));
7426 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7427
7428 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7429 ins_encode %{
7430 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7431 true /*min*/, false /*single*/);
7432 %}
7433 ins_pipe( pipe_slow );
7434 %}
7435
7436 // Load Effective Address
7437 instruct leaP8(rRegP dst, indOffset8 mem)
7438 %{
7439 match(Set dst mem);
7440
7441 ins_cost(110); // XXX
7442 format %{ "leaq $dst, $mem\t# ptr 8" %}
7443 ins_encode %{
7444 __ leaq($dst$$Register, $mem$$Address);
7445 %}
7446 ins_pipe(ialu_reg_reg_fat);
7447 %}
7448
7449 instruct leaP32(rRegP dst, indOffset32 mem)
7450 %{
7451 match(Set dst mem);
7452
7453 ins_cost(110);
7454 format %{ "leaq $dst, $mem\t# ptr 32" %}
7455 ins_encode %{
7456 __ leaq($dst$$Register, $mem$$Address);
7457 %}
7458 ins_pipe(ialu_reg_reg_fat);
7459 %}
7460
7461 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7462 %{
7463 match(Set dst mem);
7464
7465 ins_cost(110);
7466 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7467 ins_encode %{
7468 __ leaq($dst$$Register, $mem$$Address);
7469 %}
7470 ins_pipe(ialu_reg_reg_fat);
7471 %}
7472
7473 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7474 %{
7475 match(Set dst mem);
7476
7477 ins_cost(110);
7478 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7479 ins_encode %{
7480 __ leaq($dst$$Register, $mem$$Address);
7481 %}
7482 ins_pipe(ialu_reg_reg_fat);
7483 %}
7484
7485 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7486 %{
7487 match(Set dst mem);
7488
7489 ins_cost(110);
7490 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7491 ins_encode %{
7492 __ leaq($dst$$Register, $mem$$Address);
7493 %}
7494 ins_pipe(ialu_reg_reg_fat);
7495 %}
7496
7497 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7498 %{
7499 match(Set dst mem);
7500
7501 ins_cost(110);
7502 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7503 ins_encode %{
7504 __ leaq($dst$$Register, $mem$$Address);
7505 %}
7506 ins_pipe(ialu_reg_reg_fat);
7507 %}
7508
7509 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7510 %{
7511 match(Set dst mem);
7512
7513 ins_cost(110);
7514 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7515 ins_encode %{
7516 __ leaq($dst$$Register, $mem$$Address);
7517 %}
7518 ins_pipe(ialu_reg_reg_fat);
7519 %}
7520
7521 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7522 %{
7523 match(Set dst mem);
7524
7525 ins_cost(110);
7526 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7527 ins_encode %{
7528 __ leaq($dst$$Register, $mem$$Address);
7529 %}
7530 ins_pipe(ialu_reg_reg_fat);
7531 %}
7532
7533 // Load Effective Address which uses Narrow (32-bits) oop
7534 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7535 %{
7536 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7537 match(Set dst mem);
7538
7539 ins_cost(110);
7540 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7541 ins_encode %{
7542 __ leaq($dst$$Register, $mem$$Address);
7543 %}
7544 ins_pipe(ialu_reg_reg_fat);
7545 %}
7546
7547 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7548 %{
7549 predicate(CompressedOops::shift() == 0);
7550 match(Set dst mem);
7551
7552 ins_cost(110); // XXX
7553 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7554 ins_encode %{
7555 __ leaq($dst$$Register, $mem$$Address);
7556 %}
7557 ins_pipe(ialu_reg_reg_fat);
7558 %}
7559
7560 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7561 %{
7562 predicate(CompressedOops::shift() == 0);
7563 match(Set dst mem);
7564
7565 ins_cost(110);
7566 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7567 ins_encode %{
7568 __ leaq($dst$$Register, $mem$$Address);
7569 %}
7570 ins_pipe(ialu_reg_reg_fat);
7571 %}
7572
7573 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7574 %{
7575 predicate(CompressedOops::shift() == 0);
7576 match(Set dst mem);
7577
7578 ins_cost(110);
7579 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7580 ins_encode %{
7581 __ leaq($dst$$Register, $mem$$Address);
7582 %}
7583 ins_pipe(ialu_reg_reg_fat);
7584 %}
7585
7586 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7587 %{
7588 predicate(CompressedOops::shift() == 0);
7589 match(Set dst mem);
7590
7591 ins_cost(110);
7592 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7593 ins_encode %{
7594 __ leaq($dst$$Register, $mem$$Address);
7595 %}
7596 ins_pipe(ialu_reg_reg_fat);
7597 %}
7598
7599 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7600 %{
7601 predicate(CompressedOops::shift() == 0);
7602 match(Set dst mem);
7603
7604 ins_cost(110);
7605 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7606 ins_encode %{
7607 __ leaq($dst$$Register, $mem$$Address);
7608 %}
7609 ins_pipe(ialu_reg_reg_fat);
7610 %}
7611
7612 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7613 %{
7614 predicate(CompressedOops::shift() == 0);
7615 match(Set dst mem);
7616
7617 ins_cost(110);
7618 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7619 ins_encode %{
7620 __ leaq($dst$$Register, $mem$$Address);
7621 %}
7622 ins_pipe(ialu_reg_reg_fat);
7623 %}
7624
7625 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7626 %{
7627 predicate(CompressedOops::shift() == 0);
7628 match(Set dst mem);
7629
7630 ins_cost(110);
7631 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7632 ins_encode %{
7633 __ leaq($dst$$Register, $mem$$Address);
7634 %}
7635 ins_pipe(ialu_reg_reg_fat);
7636 %}
7637
7638 instruct loadConI(rRegI dst, immI src)
7639 %{
7640 match(Set dst src);
7641
7642 format %{ "movl $dst, $src\t# int" %}
7643 ins_encode %{
7644 __ movl($dst$$Register, $src$$constant);
7645 %}
7646 ins_pipe(ialu_reg_fat); // XXX
7647 %}
7648
7649 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7650 %{
7651 match(Set dst src);
7652 effect(KILL cr);
7653
7654 ins_cost(50);
7655 format %{ "xorl $dst, $dst\t# int" %}
7656 ins_encode %{
7657 __ xorl($dst$$Register, $dst$$Register);
7658 %}
7659 ins_pipe(ialu_reg);
7660 %}
7661
7662 instruct loadConL(rRegL dst, immL src)
7663 %{
7664 match(Set dst src);
7665
7666 ins_cost(150);
7667 format %{ "movq $dst, $src\t# long" %}
7668 ins_encode %{
7669 __ mov64($dst$$Register, $src$$constant);
7670 %}
7671 ins_pipe(ialu_reg);
7672 %}
7673
7674 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7675 %{
7676 match(Set dst src);
7677 effect(KILL cr);
7678
7679 ins_cost(50);
7680 format %{ "xorl $dst, $dst\t# long" %}
7681 ins_encode %{
7682 __ xorl($dst$$Register, $dst$$Register);
7683 %}
7684 ins_pipe(ialu_reg); // XXX
7685 %}
7686
7687 instruct loadConUL32(rRegL dst, immUL32 src)
7688 %{
7689 match(Set dst src);
7690
7691 ins_cost(60);
7692 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7693 ins_encode %{
7694 __ movl($dst$$Register, $src$$constant);
7695 %}
7696 ins_pipe(ialu_reg);
7697 %}
7698
7699 instruct loadConL32(rRegL dst, immL32 src)
7700 %{
7701 match(Set dst src);
7702
7703 ins_cost(70);
7704 format %{ "movq $dst, $src\t# long (32-bit)" %}
7705 ins_encode %{
7706 __ movq($dst$$Register, $src$$constant);
7707 %}
7708 ins_pipe(ialu_reg);
7709 %}
7710
7711 instruct loadConP(rRegP dst, immP con) %{
7712 match(Set dst con);
7713
7714 format %{ "movq $dst, $con\t# ptr" %}
7715 ins_encode %{
7716 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7717 %}
7718 ins_pipe(ialu_reg_fat); // XXX
7719 %}
7720
7721 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7722 %{
7723 match(Set dst src);
7724 effect(KILL cr);
7725
7726 ins_cost(50);
7727 format %{ "xorl $dst, $dst\t# ptr" %}
7728 ins_encode %{
7729 __ xorl($dst$$Register, $dst$$Register);
7730 %}
7731 ins_pipe(ialu_reg);
7732 %}
7733
7734 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7735 %{
7736 match(Set dst src);
7737 effect(KILL cr);
7738
7739 ins_cost(60);
7740 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7741 ins_encode %{
7742 __ movl($dst$$Register, $src$$constant);
7743 %}
7744 ins_pipe(ialu_reg);
7745 %}
7746
7747 instruct loadConF(regF dst, immF con) %{
7748 match(Set dst con);
7749 ins_cost(125);
7750 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7751 ins_encode %{
7752 __ movflt($dst$$XMMRegister, $constantaddress($con));
7753 %}
7754 ins_pipe(pipe_slow);
7755 %}
7756
7757 instruct loadConH(regF dst, immH con) %{
7758 match(Set dst con);
7759 ins_cost(125);
7760 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7761 ins_encode %{
7762 __ movflt($dst$$XMMRegister, $constantaddress($con));
7763 %}
7764 ins_pipe(pipe_slow);
7765 %}
7766
7767 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7768 match(Set dst src);
7769 effect(KILL cr);
7770 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7771 ins_encode %{
7772 __ xorq($dst$$Register, $dst$$Register);
7773 %}
7774 ins_pipe(ialu_reg);
7775 %}
7776
7777 instruct loadConN(rRegN dst, immN src) %{
7778 match(Set dst src);
7779
7780 ins_cost(125);
7781 format %{ "movl $dst, $src\t# compressed ptr" %}
7782 ins_encode %{
7783 address con = (address)$src$$constant;
7784 if (con == nullptr) {
7785 ShouldNotReachHere();
7786 } else {
7787 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7788 }
7789 %}
7790 ins_pipe(ialu_reg_fat); // XXX
7791 %}
7792
7793 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7794 match(Set dst src);
7795
7796 ins_cost(125);
7797 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7798 ins_encode %{
7799 address con = (address)$src$$constant;
7800 if (con == nullptr) {
7801 ShouldNotReachHere();
7802 } else {
7803 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7804 }
7805 %}
7806 ins_pipe(ialu_reg_fat); // XXX
7807 %}
7808
7809 instruct loadConF0(regF dst, immF0 src)
7810 %{
7811 match(Set dst src);
7812 ins_cost(100);
7813
7814 format %{ "xorps $dst, $dst\t# float 0.0" %}
7815 ins_encode %{
7816 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7817 %}
7818 ins_pipe(pipe_slow);
7819 %}
7820
7821 // Use the same format since predicate() can not be used here.
7822 instruct loadConD(regD dst, immD con) %{
7823 match(Set dst con);
7824 ins_cost(125);
7825 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7826 ins_encode %{
7827 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7828 %}
7829 ins_pipe(pipe_slow);
7830 %}
7831
7832 instruct loadConD0(regD dst, immD0 src)
7833 %{
7834 match(Set dst src);
7835 ins_cost(100);
7836
7837 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7838 ins_encode %{
7839 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7840 %}
7841 ins_pipe(pipe_slow);
7842 %}
7843
7844 instruct loadSSI(rRegI dst, stackSlotI src)
7845 %{
7846 match(Set dst src);
7847
7848 ins_cost(125);
7849 format %{ "movl $dst, $src\t# int stk" %}
7850 ins_encode %{
7851 __ movl($dst$$Register, $src$$Address);
7852 %}
7853 ins_pipe(ialu_reg_mem);
7854 %}
7855
7856 instruct loadSSL(rRegL dst, stackSlotL src)
7857 %{
7858 match(Set dst src);
7859
7860 ins_cost(125);
7861 format %{ "movq $dst, $src\t# long stk" %}
7862 ins_encode %{
7863 __ movq($dst$$Register, $src$$Address);
7864 %}
7865 ins_pipe(ialu_reg_mem);
7866 %}
7867
7868 instruct loadSSP(rRegP dst, stackSlotP src)
7869 %{
7870 match(Set dst src);
7871
7872 ins_cost(125);
7873 format %{ "movq $dst, $src\t# ptr stk" %}
7874 ins_encode %{
7875 __ movq($dst$$Register, $src$$Address);
7876 %}
7877 ins_pipe(ialu_reg_mem);
7878 %}
7879
7880 instruct loadSSF(regF dst, stackSlotF src)
7881 %{
7882 match(Set dst src);
7883
7884 ins_cost(125);
7885 format %{ "movss $dst, $src\t# float stk" %}
7886 ins_encode %{
7887 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7888 %}
7889 ins_pipe(pipe_slow); // XXX
7890 %}
7891
7892 // Use the same format since predicate() can not be used here.
7893 instruct loadSSD(regD dst, stackSlotD src)
7894 %{
7895 match(Set dst src);
7896
7897 ins_cost(125);
7898 format %{ "movsd $dst, $src\t# double stk" %}
7899 ins_encode %{
7900 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7901 %}
7902 ins_pipe(pipe_slow); // XXX
7903 %}
7904
7905 // Prefetch instructions for allocation.
7906 // Must be safe to execute with invalid address (cannot fault).
7907
7908 instruct prefetchAlloc( memory mem ) %{
7909 predicate(AllocatePrefetchInstr==3);
7910 match(PrefetchAllocation mem);
7911 ins_cost(125);
7912
7913 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7914 ins_encode %{
7915 __ prefetchw($mem$$Address);
7916 %}
7917 ins_pipe(ialu_mem);
7918 %}
7919
7920 instruct prefetchAllocNTA( memory mem ) %{
7921 predicate(AllocatePrefetchInstr==0);
7922 match(PrefetchAllocation mem);
7923 ins_cost(125);
7924
7925 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7926 ins_encode %{
7927 __ prefetchnta($mem$$Address);
7928 %}
7929 ins_pipe(ialu_mem);
7930 %}
7931
7932 instruct prefetchAllocT0( memory mem ) %{
7933 predicate(AllocatePrefetchInstr==1);
7934 match(PrefetchAllocation mem);
7935 ins_cost(125);
7936
7937 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7938 ins_encode %{
7939 __ prefetcht0($mem$$Address);
7940 %}
7941 ins_pipe(ialu_mem);
7942 %}
7943
7944 instruct prefetchAllocT2( memory mem ) %{
7945 predicate(AllocatePrefetchInstr==2);
7946 match(PrefetchAllocation mem);
7947 ins_cost(125);
7948
7949 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
7950 ins_encode %{
7951 __ prefetcht2($mem$$Address);
7952 %}
7953 ins_pipe(ialu_mem);
7954 %}
7955
7956 //----------Store Instructions-------------------------------------------------
7957
7958 // Store Byte
7959 instruct storeB(memory mem, rRegI src)
7960 %{
7961 match(Set mem (StoreB mem src));
7962
7963 ins_cost(125); // XXX
7964 format %{ "movb $mem, $src\t# byte" %}
7965 ins_encode %{
7966 __ movb($mem$$Address, $src$$Register);
7967 %}
7968 ins_pipe(ialu_mem_reg);
7969 %}
7970
7971 // Store Char/Short
7972 instruct storeC(memory mem, rRegI src)
7973 %{
7974 match(Set mem (StoreC mem src));
7975
7976 ins_cost(125); // XXX
7977 format %{ "movw $mem, $src\t# char/short" %}
7978 ins_encode %{
7979 __ movw($mem$$Address, $src$$Register);
7980 %}
7981 ins_pipe(ialu_mem_reg);
7982 %}
7983
7984 // Store Integer
7985 instruct storeI(memory mem, rRegI src)
7986 %{
7987 match(Set mem (StoreI mem src));
7988
7989 ins_cost(125); // XXX
7990 format %{ "movl $mem, $src\t# int" %}
7991 ins_encode %{
7992 __ movl($mem$$Address, $src$$Register);
7993 %}
7994 ins_pipe(ialu_mem_reg);
7995 %}
7996
7997 // Store Long
7998 instruct storeL(memory mem, rRegL src)
7999 %{
8000 match(Set mem (StoreL mem src));
8001
8002 ins_cost(125); // XXX
8003 format %{ "movq $mem, $src\t# long" %}
8004 ins_encode %{
8005 __ movq($mem$$Address, $src$$Register);
8006 %}
8007 ins_pipe(ialu_mem_reg); // XXX
8008 %}
8009
8010 // Store Pointer
8011 instruct storeP(memory mem, any_RegP src)
8012 %{
8013 predicate(n->as_Store()->barrier_data() == 0);
8014 match(Set mem (StoreP mem src));
8015
8016 ins_cost(125); // XXX
8017 format %{ "movq $mem, $src\t# ptr" %}
8018 ins_encode %{
8019 __ movq($mem$$Address, $src$$Register);
8020 %}
8021 ins_pipe(ialu_mem_reg);
8022 %}
8023
8024 instruct storeImmP0(memory mem, immP0 zero)
8025 %{
8026 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8027 match(Set mem (StoreP mem zero));
8028
8029 ins_cost(125); // XXX
8030 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8031 ins_encode %{
8032 __ movq($mem$$Address, r12);
8033 %}
8034 ins_pipe(ialu_mem_reg);
8035 %}
8036
8037 // Store Null Pointer, mark word, or other simple pointer constant.
8038 instruct storeImmP(memory mem, immP31 src)
8039 %{
8040 predicate(n->as_Store()->barrier_data() == 0);
8041 match(Set mem (StoreP mem src));
8042
8043 ins_cost(150); // XXX
8044 format %{ "movq $mem, $src\t# ptr" %}
8045 ins_encode %{
8046 __ movq($mem$$Address, $src$$constant);
8047 %}
8048 ins_pipe(ialu_mem_imm);
8049 %}
8050
8051 // Store Compressed Pointer
8052 instruct storeN(memory mem, rRegN src)
8053 %{
8054 predicate(n->as_Store()->barrier_data() == 0);
8055 match(Set mem (StoreN mem src));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, $src\t# compressed ptr" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, $src$$Register);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 instruct storeNKlass(memory mem, rRegN src)
8066 %{
8067 match(Set mem (StoreNKlass mem src));
8068
8069 ins_cost(125); // XXX
8070 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8071 ins_encode %{
8072 __ movl($mem$$Address, $src$$Register);
8073 %}
8074 ins_pipe(ialu_mem_reg);
8075 %}
8076
8077 instruct storeImmN0(memory mem, immN0 zero)
8078 %{
8079 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8080 match(Set mem (StoreN mem zero));
8081
8082 ins_cost(125); // XXX
8083 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8084 ins_encode %{
8085 __ movl($mem$$Address, r12);
8086 %}
8087 ins_pipe(ialu_mem_reg);
8088 %}
8089
8090 instruct storeImmN(memory mem, immN src)
8091 %{
8092 predicate(n->as_Store()->barrier_data() == 0);
8093 match(Set mem (StoreN mem src));
8094
8095 ins_cost(150); // XXX
8096 format %{ "movl $mem, $src\t# compressed ptr" %}
8097 ins_encode %{
8098 address con = (address)$src$$constant;
8099 if (con == nullptr) {
8100 __ movl($mem$$Address, 0);
8101 } else {
8102 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8103 }
8104 %}
8105 ins_pipe(ialu_mem_imm);
8106 %}
8107
8108 instruct storeImmNKlass(memory mem, immNKlass src)
8109 %{
8110 match(Set mem (StoreNKlass mem src));
8111
8112 ins_cost(150); // XXX
8113 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8114 ins_encode %{
8115 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8116 %}
8117 ins_pipe(ialu_mem_imm);
8118 %}
8119
8120 // Store Integer Immediate
8121 instruct storeImmI0(memory mem, immI_0 zero)
8122 %{
8123 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8124 match(Set mem (StoreI mem zero));
8125
8126 ins_cost(125); // XXX
8127 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8128 ins_encode %{
8129 __ movl($mem$$Address, r12);
8130 %}
8131 ins_pipe(ialu_mem_reg);
8132 %}
8133
8134 instruct storeImmI(memory mem, immI src)
8135 %{
8136 match(Set mem (StoreI mem src));
8137
8138 ins_cost(150);
8139 format %{ "movl $mem, $src\t# int" %}
8140 ins_encode %{
8141 __ movl($mem$$Address, $src$$constant);
8142 %}
8143 ins_pipe(ialu_mem_imm);
8144 %}
8145
8146 // Store Long Immediate
8147 instruct storeImmL0(memory mem, immL0 zero)
8148 %{
8149 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8150 match(Set mem (StoreL mem zero));
8151
8152 ins_cost(125); // XXX
8153 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8154 ins_encode %{
8155 __ movq($mem$$Address, r12);
8156 %}
8157 ins_pipe(ialu_mem_reg);
8158 %}
8159
8160 instruct storeImmL(memory mem, immL32 src)
8161 %{
8162 match(Set mem (StoreL mem src));
8163
8164 ins_cost(150);
8165 format %{ "movq $mem, $src\t# long" %}
8166 ins_encode %{
8167 __ movq($mem$$Address, $src$$constant);
8168 %}
8169 ins_pipe(ialu_mem_imm);
8170 %}
8171
8172 // Store Short/Char Immediate
8173 instruct storeImmC0(memory mem, immI_0 zero)
8174 %{
8175 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8176 match(Set mem (StoreC mem zero));
8177
8178 ins_cost(125); // XXX
8179 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8180 ins_encode %{
8181 __ movw($mem$$Address, r12);
8182 %}
8183 ins_pipe(ialu_mem_reg);
8184 %}
8185
8186 instruct storeImmI16(memory mem, immI16 src)
8187 %{
8188 predicate(UseStoreImmI16);
8189 match(Set mem (StoreC mem src));
8190
8191 ins_cost(150);
8192 format %{ "movw $mem, $src\t# short/char" %}
8193 ins_encode %{
8194 __ movw($mem$$Address, $src$$constant);
8195 %}
8196 ins_pipe(ialu_mem_imm);
8197 %}
8198
8199 // Store Byte Immediate
8200 instruct storeImmB0(memory mem, immI_0 zero)
8201 %{
8202 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8203 match(Set mem (StoreB mem zero));
8204
8205 ins_cost(125); // XXX
8206 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8207 ins_encode %{
8208 __ movb($mem$$Address, r12);
8209 %}
8210 ins_pipe(ialu_mem_reg);
8211 %}
8212
8213 instruct storeImmB(memory mem, immI8 src)
8214 %{
8215 match(Set mem (StoreB mem src));
8216
8217 ins_cost(150); // XXX
8218 format %{ "movb $mem, $src\t# byte" %}
8219 ins_encode %{
8220 __ movb($mem$$Address, $src$$constant);
8221 %}
8222 ins_pipe(ialu_mem_imm);
8223 %}
8224
8225 // Store Float
8226 instruct storeF(memory mem, regF src)
8227 %{
8228 match(Set mem (StoreF mem src));
8229
8230 ins_cost(95); // XXX
8231 format %{ "movss $mem, $src\t# float" %}
8232 ins_encode %{
8233 __ movflt($mem$$Address, $src$$XMMRegister);
8234 %}
8235 ins_pipe(pipe_slow); // XXX
8236 %}
8237
8238 // Store immediate Float value (it is faster than store from XMM register)
8239 instruct storeF0(memory mem, immF0 zero)
8240 %{
8241 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8242 match(Set mem (StoreF mem zero));
8243
8244 ins_cost(25); // XXX
8245 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8246 ins_encode %{
8247 __ movl($mem$$Address, r12);
8248 %}
8249 ins_pipe(ialu_mem_reg);
8250 %}
8251
8252 instruct storeF_imm(memory mem, immF src)
8253 %{
8254 match(Set mem (StoreF mem src));
8255
8256 ins_cost(50);
8257 format %{ "movl $mem, $src\t# float" %}
8258 ins_encode %{
8259 __ movl($mem$$Address, jint_cast($src$$constant));
8260 %}
8261 ins_pipe(ialu_mem_imm);
8262 %}
8263
8264 // Store Double
8265 instruct storeD(memory mem, regD src)
8266 %{
8267 match(Set mem (StoreD mem src));
8268
8269 ins_cost(95); // XXX
8270 format %{ "movsd $mem, $src\t# double" %}
8271 ins_encode %{
8272 __ movdbl($mem$$Address, $src$$XMMRegister);
8273 %}
8274 ins_pipe(pipe_slow); // XXX
8275 %}
8276
8277 // Store immediate double 0.0 (it is faster than store from XMM register)
8278 instruct storeD0_imm(memory mem, immD0 src)
8279 %{
8280 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8281 match(Set mem (StoreD mem src));
8282
8283 ins_cost(50);
8284 format %{ "movq $mem, $src\t# double 0." %}
8285 ins_encode %{
8286 __ movq($mem$$Address, $src$$constant);
8287 %}
8288 ins_pipe(ialu_mem_imm);
8289 %}
8290
8291 instruct storeD0(memory mem, immD0 zero)
8292 %{
8293 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8294 match(Set mem (StoreD mem zero));
8295
8296 ins_cost(25); // XXX
8297 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8298 ins_encode %{
8299 __ movq($mem$$Address, r12);
8300 %}
8301 ins_pipe(ialu_mem_reg);
8302 %}
8303
8304 instruct storeSSI(stackSlotI dst, rRegI src)
8305 %{
8306 match(Set dst src);
8307
8308 ins_cost(100);
8309 format %{ "movl $dst, $src\t# int stk" %}
8310 ins_encode %{
8311 __ movl($dst$$Address, $src$$Register);
8312 %}
8313 ins_pipe( ialu_mem_reg );
8314 %}
8315
8316 instruct storeSSL(stackSlotL dst, rRegL src)
8317 %{
8318 match(Set dst src);
8319
8320 ins_cost(100);
8321 format %{ "movq $dst, $src\t# long stk" %}
8322 ins_encode %{
8323 __ movq($dst$$Address, $src$$Register);
8324 %}
8325 ins_pipe(ialu_mem_reg);
8326 %}
8327
8328 instruct storeSSP(stackSlotP dst, rRegP src)
8329 %{
8330 match(Set dst src);
8331
8332 ins_cost(100);
8333 format %{ "movq $dst, $src\t# ptr stk" %}
8334 ins_encode %{
8335 __ movq($dst$$Address, $src$$Register);
8336 %}
8337 ins_pipe(ialu_mem_reg);
8338 %}
8339
8340 instruct storeSSF(stackSlotF dst, regF src)
8341 %{
8342 match(Set dst src);
8343
8344 ins_cost(95); // XXX
8345 format %{ "movss $dst, $src\t# float stk" %}
8346 ins_encode %{
8347 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8348 %}
8349 ins_pipe(pipe_slow); // XXX
8350 %}
8351
8352 instruct storeSSD(stackSlotD dst, regD src)
8353 %{
8354 match(Set dst src);
8355
8356 ins_cost(95); // XXX
8357 format %{ "movsd $dst, $src\t# double stk" %}
8358 ins_encode %{
8359 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8360 %}
8361 ins_pipe(pipe_slow); // XXX
8362 %}
8363
8364 instruct cacheWB(indirect addr)
8365 %{
8366 predicate(VM_Version::supports_data_cache_line_flush());
8367 match(CacheWB addr);
8368
8369 ins_cost(100);
8370 format %{"cache wb $addr" %}
8371 ins_encode %{
8372 assert($addr->index_position() < 0, "should be");
8373 assert($addr$$disp == 0, "should be");
8374 __ cache_wb(Address($addr$$base$$Register, 0));
8375 %}
8376 ins_pipe(pipe_slow); // XXX
8377 %}
8378
8379 instruct cacheWBPreSync()
8380 %{
8381 predicate(VM_Version::supports_data_cache_line_flush());
8382 match(CacheWBPreSync);
8383
8384 ins_cost(100);
8385 format %{"cache wb presync" %}
8386 ins_encode %{
8387 __ cache_wbsync(true);
8388 %}
8389 ins_pipe(pipe_slow); // XXX
8390 %}
8391
8392 instruct cacheWBPostSync()
8393 %{
8394 predicate(VM_Version::supports_data_cache_line_flush());
8395 match(CacheWBPostSync);
8396
8397 ins_cost(100);
8398 format %{"cache wb postsync" %}
8399 ins_encode %{
8400 __ cache_wbsync(false);
8401 %}
8402 ins_pipe(pipe_slow); // XXX
8403 %}
8404
8405 //----------BSWAP Instructions-------------------------------------------------
8406 instruct bytes_reverse_int(rRegI dst) %{
8407 match(Set dst (ReverseBytesI dst));
8408
8409 format %{ "bswapl $dst" %}
8410 ins_encode %{
8411 __ bswapl($dst$$Register);
8412 %}
8413 ins_pipe( ialu_reg );
8414 %}
8415
8416 instruct bytes_reverse_long(rRegL dst) %{
8417 match(Set dst (ReverseBytesL dst));
8418
8419 format %{ "bswapq $dst" %}
8420 ins_encode %{
8421 __ bswapq($dst$$Register);
8422 %}
8423 ins_pipe( ialu_reg);
8424 %}
8425
8426 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8427 match(Set dst (ReverseBytesUS dst));
8428 effect(KILL cr);
8429
8430 format %{ "bswapl $dst\n\t"
8431 "shrl $dst,16\n\t" %}
8432 ins_encode %{
8433 __ bswapl($dst$$Register);
8434 __ shrl($dst$$Register, 16);
8435 %}
8436 ins_pipe( ialu_reg );
8437 %}
8438
8439 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8440 match(Set dst (ReverseBytesS dst));
8441 effect(KILL cr);
8442
8443 format %{ "bswapl $dst\n\t"
8444 "sar $dst,16\n\t" %}
8445 ins_encode %{
8446 __ bswapl($dst$$Register);
8447 __ sarl($dst$$Register, 16);
8448 %}
8449 ins_pipe( ialu_reg );
8450 %}
8451
8452 //---------- Zeros Count Instructions ------------------------------------------
8453
8454 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8455 predicate(UseCountLeadingZerosInstruction);
8456 match(Set dst (CountLeadingZerosI src));
8457 effect(KILL cr);
8458
8459 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8460 ins_encode %{
8461 __ lzcntl($dst$$Register, $src$$Register);
8462 %}
8463 ins_pipe(ialu_reg);
8464 %}
8465
8466 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8467 predicate(UseCountLeadingZerosInstruction);
8468 match(Set dst (CountLeadingZerosI (LoadI src)));
8469 effect(KILL cr);
8470 ins_cost(175);
8471 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8472 ins_encode %{
8473 __ lzcntl($dst$$Register, $src$$Address);
8474 %}
8475 ins_pipe(ialu_reg_mem);
8476 %}
8477
8478 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8479 predicate(!UseCountLeadingZerosInstruction);
8480 match(Set dst (CountLeadingZerosI src));
8481 effect(KILL cr);
8482
8483 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8484 "jnz skip\n\t"
8485 "movl $dst, -1\n"
8486 "skip:\n\t"
8487 "negl $dst\n\t"
8488 "addl $dst, 31" %}
8489 ins_encode %{
8490 Register Rdst = $dst$$Register;
8491 Register Rsrc = $src$$Register;
8492 Label skip;
8493 __ bsrl(Rdst, Rsrc);
8494 __ jccb(Assembler::notZero, skip);
8495 __ movl(Rdst, -1);
8496 __ bind(skip);
8497 __ negl(Rdst);
8498 __ addl(Rdst, BitsPerInt - 1);
8499 %}
8500 ins_pipe(ialu_reg);
8501 %}
8502
8503 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8504 predicate(UseCountLeadingZerosInstruction);
8505 match(Set dst (CountLeadingZerosL src));
8506 effect(KILL cr);
8507
8508 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8509 ins_encode %{
8510 __ lzcntq($dst$$Register, $src$$Register);
8511 %}
8512 ins_pipe(ialu_reg);
8513 %}
8514
8515 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8516 predicate(UseCountLeadingZerosInstruction);
8517 match(Set dst (CountLeadingZerosL (LoadL src)));
8518 effect(KILL cr);
8519 ins_cost(175);
8520 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8521 ins_encode %{
8522 __ lzcntq($dst$$Register, $src$$Address);
8523 %}
8524 ins_pipe(ialu_reg_mem);
8525 %}
8526
8527 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8528 predicate(!UseCountLeadingZerosInstruction);
8529 match(Set dst (CountLeadingZerosL src));
8530 effect(KILL cr);
8531
8532 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8533 "jnz skip\n\t"
8534 "movl $dst, -1\n"
8535 "skip:\n\t"
8536 "negl $dst\n\t"
8537 "addl $dst, 63" %}
8538 ins_encode %{
8539 Register Rdst = $dst$$Register;
8540 Register Rsrc = $src$$Register;
8541 Label skip;
8542 __ bsrq(Rdst, Rsrc);
8543 __ jccb(Assembler::notZero, skip);
8544 __ movl(Rdst, -1);
8545 __ bind(skip);
8546 __ negl(Rdst);
8547 __ addl(Rdst, BitsPerLong - 1);
8548 %}
8549 ins_pipe(ialu_reg);
8550 %}
8551
8552 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8553 predicate(UseCountTrailingZerosInstruction);
8554 match(Set dst (CountTrailingZerosI src));
8555 effect(KILL cr);
8556
8557 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8558 ins_encode %{
8559 __ tzcntl($dst$$Register, $src$$Register);
8560 %}
8561 ins_pipe(ialu_reg);
8562 %}
8563
8564 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8565 predicate(UseCountTrailingZerosInstruction);
8566 match(Set dst (CountTrailingZerosI (LoadI src)));
8567 effect(KILL cr);
8568 ins_cost(175);
8569 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8570 ins_encode %{
8571 __ tzcntl($dst$$Register, $src$$Address);
8572 %}
8573 ins_pipe(ialu_reg_mem);
8574 %}
8575
8576 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8577 predicate(!UseCountTrailingZerosInstruction);
8578 match(Set dst (CountTrailingZerosI src));
8579 effect(KILL cr);
8580
8581 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8582 "jnz done\n\t"
8583 "movl $dst, 32\n"
8584 "done:" %}
8585 ins_encode %{
8586 Register Rdst = $dst$$Register;
8587 Label done;
8588 __ bsfl(Rdst, $src$$Register);
8589 __ jccb(Assembler::notZero, done);
8590 __ movl(Rdst, BitsPerInt);
8591 __ bind(done);
8592 %}
8593 ins_pipe(ialu_reg);
8594 %}
8595
8596 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8597 predicate(UseCountTrailingZerosInstruction);
8598 match(Set dst (CountTrailingZerosL src));
8599 effect(KILL cr);
8600
8601 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8602 ins_encode %{
8603 __ tzcntq($dst$$Register, $src$$Register);
8604 %}
8605 ins_pipe(ialu_reg);
8606 %}
8607
8608 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8609 predicate(UseCountTrailingZerosInstruction);
8610 match(Set dst (CountTrailingZerosL (LoadL src)));
8611 effect(KILL cr);
8612 ins_cost(175);
8613 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8614 ins_encode %{
8615 __ tzcntq($dst$$Register, $src$$Address);
8616 %}
8617 ins_pipe(ialu_reg_mem);
8618 %}
8619
8620 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8621 predicate(!UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosL src));
8623 effect(KILL cr);
8624
8625 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8626 "jnz done\n\t"
8627 "movl $dst, 64\n"
8628 "done:" %}
8629 ins_encode %{
8630 Register Rdst = $dst$$Register;
8631 Label done;
8632 __ bsfq(Rdst, $src$$Register);
8633 __ jccb(Assembler::notZero, done);
8634 __ movl(Rdst, BitsPerLong);
8635 __ bind(done);
8636 %}
8637 ins_pipe(ialu_reg);
8638 %}
8639
8640 //--------------- Reverse Operation Instructions ----------------
8641 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8642 predicate(!VM_Version::supports_gfni());
8643 match(Set dst (ReverseI src));
8644 effect(TEMP dst, TEMP rtmp, KILL cr);
8645 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8646 ins_encode %{
8647 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8648 %}
8649 ins_pipe( ialu_reg );
8650 %}
8651
8652 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8653 predicate(VM_Version::supports_gfni());
8654 match(Set dst (ReverseI src));
8655 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8656 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8657 ins_encode %{
8658 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8659 %}
8660 ins_pipe( ialu_reg );
8661 %}
8662
8663 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8664 predicate(!VM_Version::supports_gfni());
8665 match(Set dst (ReverseL src));
8666 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8667 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8668 ins_encode %{
8669 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8670 %}
8671 ins_pipe( ialu_reg );
8672 %}
8673
8674 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8675 predicate(VM_Version::supports_gfni());
8676 match(Set dst (ReverseL src));
8677 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8678 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8679 ins_encode %{
8680 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8681 %}
8682 ins_pipe( ialu_reg );
8683 %}
8684
8685 //---------- Population Count Instructions -------------------------------------
8686
8687 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8688 predicate(UsePopCountInstruction);
8689 match(Set dst (PopCountI src));
8690 effect(KILL cr);
8691
8692 format %{ "popcnt $dst, $src" %}
8693 ins_encode %{
8694 __ popcntl($dst$$Register, $src$$Register);
8695 %}
8696 ins_pipe(ialu_reg);
8697 %}
8698
8699 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8700 predicate(UsePopCountInstruction);
8701 match(Set dst (PopCountI (LoadI mem)));
8702 effect(KILL cr);
8703
8704 format %{ "popcnt $dst, $mem" %}
8705 ins_encode %{
8706 __ popcntl($dst$$Register, $mem$$Address);
8707 %}
8708 ins_pipe(ialu_reg);
8709 %}
8710
8711 // Note: Long.bitCount(long) returns an int.
8712 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8713 predicate(UsePopCountInstruction);
8714 match(Set dst (PopCountL src));
8715 effect(KILL cr);
8716
8717 format %{ "popcnt $dst, $src" %}
8718 ins_encode %{
8719 __ popcntq($dst$$Register, $src$$Register);
8720 %}
8721 ins_pipe(ialu_reg);
8722 %}
8723
8724 // Note: Long.bitCount(long) returns an int.
8725 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8726 predicate(UsePopCountInstruction);
8727 match(Set dst (PopCountL (LoadL mem)));
8728 effect(KILL cr);
8729
8730 format %{ "popcnt $dst, $mem" %}
8731 ins_encode %{
8732 __ popcntq($dst$$Register, $mem$$Address);
8733 %}
8734 ins_pipe(ialu_reg);
8735 %}
8736
8737
8738 //----------MemBar Instructions-----------------------------------------------
8739 // Memory barrier flavors
8740
8741 instruct membar_acquire()
8742 %{
8743 match(MemBarAcquire);
8744 match(LoadFence);
8745 ins_cost(0);
8746
8747 size(0);
8748 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8749 ins_encode();
8750 ins_pipe(empty);
8751 %}
8752
8753 instruct membar_acquire_lock()
8754 %{
8755 match(MemBarAcquireLock);
8756 ins_cost(0);
8757
8758 size(0);
8759 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8760 ins_encode();
8761 ins_pipe(empty);
8762 %}
8763
8764 instruct membar_release()
8765 %{
8766 match(MemBarRelease);
8767 match(StoreFence);
8768 ins_cost(0);
8769
8770 size(0);
8771 format %{ "MEMBAR-release ! (empty encoding)" %}
8772 ins_encode();
8773 ins_pipe(empty);
8774 %}
8775
8776 instruct membar_release_lock()
8777 %{
8778 match(MemBarReleaseLock);
8779 ins_cost(0);
8780
8781 size(0);
8782 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8783 ins_encode();
8784 ins_pipe(empty);
8785 %}
8786
8787 instruct membar_volatile(rFlagsReg cr) %{
8788 match(MemBarVolatile);
8789 effect(KILL cr);
8790 ins_cost(400);
8791
8792 format %{
8793 $$template
8794 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8795 %}
8796 ins_encode %{
8797 __ membar(Assembler::StoreLoad);
8798 %}
8799 ins_pipe(pipe_slow);
8800 %}
8801
8802 instruct unnecessary_membar_volatile()
8803 %{
8804 match(MemBarVolatile);
8805 predicate(Matcher::post_store_load_barrier(n));
8806 ins_cost(0);
8807
8808 size(0);
8809 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8810 ins_encode();
8811 ins_pipe(empty);
8812 %}
8813
8814 instruct membar_storestore() %{
8815 match(MemBarStoreStore);
8816 match(StoreStoreFence);
8817 ins_cost(0);
8818
8819 size(0);
8820 format %{ "MEMBAR-storestore (empty encoding)" %}
8821 ins_encode( );
8822 ins_pipe(empty);
8823 %}
8824
8825 //----------Move Instructions--------------------------------------------------
8826
8827 instruct castX2P(rRegP dst, rRegL src)
8828 %{
8829 match(Set dst (CastX2P src));
8830
8831 format %{ "movq $dst, $src\t# long->ptr" %}
8832 ins_encode %{
8833 if ($dst$$reg != $src$$reg) {
8834 __ movptr($dst$$Register, $src$$Register);
8835 }
8836 %}
8837 ins_pipe(ialu_reg_reg); // XXX
8838 %}
8839
8840 instruct castP2X(rRegL dst, rRegP src)
8841 %{
8842 match(Set dst (CastP2X src));
8843
8844 format %{ "movq $dst, $src\t# ptr -> long" %}
8845 ins_encode %{
8846 if ($dst$$reg != $src$$reg) {
8847 __ movptr($dst$$Register, $src$$Register);
8848 }
8849 %}
8850 ins_pipe(ialu_reg_reg); // XXX
8851 %}
8852
8853 // Convert oop into int for vectors alignment masking
8854 instruct convP2I(rRegI dst, rRegP src)
8855 %{
8856 match(Set dst (ConvL2I (CastP2X src)));
8857
8858 format %{ "movl $dst, $src\t# ptr -> int" %}
8859 ins_encode %{
8860 __ movl($dst$$Register, $src$$Register);
8861 %}
8862 ins_pipe(ialu_reg_reg); // XXX
8863 %}
8864
8865 // Convert compressed oop into int for vectors alignment masking
8866 // in case of 32bit oops (heap < 4Gb).
8867 instruct convN2I(rRegI dst, rRegN src)
8868 %{
8869 predicate(CompressedOops::shift() == 0);
8870 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8871
8872 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8873 ins_encode %{
8874 __ movl($dst$$Register, $src$$Register);
8875 %}
8876 ins_pipe(ialu_reg_reg); // XXX
8877 %}
8878
8879 // Convert oop pointer into compressed form
8880 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8881 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8882 match(Set dst (EncodeP src));
8883 effect(KILL cr);
8884 format %{ "encode_heap_oop $dst,$src" %}
8885 ins_encode %{
8886 Register s = $src$$Register;
8887 Register d = $dst$$Register;
8888 if (s != d) {
8889 __ movq(d, s);
8890 }
8891 __ encode_heap_oop(d);
8892 %}
8893 ins_pipe(ialu_reg_long);
8894 %}
8895
8896 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8897 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8898 match(Set dst (EncodeP src));
8899 effect(KILL cr);
8900 format %{ "encode_heap_oop_not_null $dst,$src" %}
8901 ins_encode %{
8902 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8903 %}
8904 ins_pipe(ialu_reg_long);
8905 %}
8906
8907 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8908 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8909 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8910 match(Set dst (DecodeN src));
8911 effect(KILL cr);
8912 format %{ "decode_heap_oop $dst,$src" %}
8913 ins_encode %{
8914 Register s = $src$$Register;
8915 Register d = $dst$$Register;
8916 if (s != d) {
8917 __ movq(d, s);
8918 }
8919 __ decode_heap_oop(d);
8920 %}
8921 ins_pipe(ialu_reg_long);
8922 %}
8923
8924 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8925 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8926 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8927 match(Set dst (DecodeN src));
8928 effect(KILL cr);
8929 format %{ "decode_heap_oop_not_null $dst,$src" %}
8930 ins_encode %{
8931 Register s = $src$$Register;
8932 Register d = $dst$$Register;
8933 if (s != d) {
8934 __ decode_heap_oop_not_null(d, s);
8935 } else {
8936 __ decode_heap_oop_not_null(d);
8937 }
8938 %}
8939 ins_pipe(ialu_reg_long);
8940 %}
8941
8942 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8943 match(Set dst (EncodePKlass src));
8944 effect(TEMP dst, KILL cr);
8945 format %{ "encode_and_move_klass_not_null $dst,$src" %}
8946 ins_encode %{
8947 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
8948 %}
8949 ins_pipe(ialu_reg_long);
8950 %}
8951
8952 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8953 match(Set dst (DecodeNKlass src));
8954 effect(TEMP dst, KILL cr);
8955 format %{ "decode_and_move_klass_not_null $dst,$src" %}
8956 ins_encode %{
8957 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
8958 %}
8959 ins_pipe(ialu_reg_long);
8960 %}
8961
8962 //----------Conditional Move---------------------------------------------------
8963 // Jump
8964 // dummy instruction for generating temp registers
8965 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
8966 match(Jump (LShiftL switch_val shift));
8967 ins_cost(350);
8968 predicate(false);
8969 effect(TEMP dest);
8970
8971 format %{ "leaq $dest, [$constantaddress]\n\t"
8972 "jmp [$dest + $switch_val << $shift]\n\t" %}
8973 ins_encode %{
8974 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8975 // to do that and the compiler is using that register as one it can allocate.
8976 // So we build it all by hand.
8977 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
8978 // ArrayAddress dispatch(table, index);
8979 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
8980 __ lea($dest$$Register, $constantaddress);
8981 __ jmp(dispatch);
8982 %}
8983 ins_pipe(pipe_jmp);
8984 %}
8985
8986 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
8987 match(Jump (AddL (LShiftL switch_val shift) offset));
8988 ins_cost(350);
8989 effect(TEMP dest);
8990
8991 format %{ "leaq $dest, [$constantaddress]\n\t"
8992 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
8993 ins_encode %{
8994 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
8995 // to do that and the compiler is using that register as one it can allocate.
8996 // So we build it all by hand.
8997 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
8998 // ArrayAddress dispatch(table, index);
8999 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9000 __ lea($dest$$Register, $constantaddress);
9001 __ jmp(dispatch);
9002 %}
9003 ins_pipe(pipe_jmp);
9004 %}
9005
9006 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9007 match(Jump switch_val);
9008 ins_cost(350);
9009 effect(TEMP dest);
9010
9011 format %{ "leaq $dest, [$constantaddress]\n\t"
9012 "jmp [$dest + $switch_val]\n\t" %}
9013 ins_encode %{
9014 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9015 // to do that and the compiler is using that register as one it can allocate.
9016 // So we build it all by hand.
9017 // Address index(noreg, switch_reg, Address::times_1);
9018 // ArrayAddress dispatch(table, index);
9019 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9020 __ lea($dest$$Register, $constantaddress);
9021 __ jmp(dispatch);
9022 %}
9023 ins_pipe(pipe_jmp);
9024 %}
9025
9026 // Conditional move
9027 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9028 %{
9029 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9030 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9031
9032 ins_cost(100); // XXX
9033 format %{ "setbn$cop $dst\t# signed, int" %}
9034 ins_encode %{
9035 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9036 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9037 %}
9038 ins_pipe(ialu_reg);
9039 %}
9040
9041 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9042 %{
9043 predicate(!UseAPX);
9044 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9045
9046 ins_cost(200); // XXX
9047 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9048 ins_encode %{
9049 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9050 %}
9051 ins_pipe(pipe_cmov_reg);
9052 %}
9053
9054 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9055 %{
9056 predicate(UseAPX);
9057 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9058
9059 ins_cost(200);
9060 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9061 ins_encode %{
9062 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9063 %}
9064 ins_pipe(pipe_cmov_reg);
9065 %}
9066
9067 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9068 %{
9069 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9070 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9071
9072 ins_cost(100); // XXX
9073 format %{ "setbn$cop $dst\t# unsigned, int" %}
9074 ins_encode %{
9075 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9076 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9077 %}
9078 ins_pipe(ialu_reg);
9079 %}
9080
9081 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9082 predicate(!UseAPX);
9083 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9084
9085 ins_cost(200); // XXX
9086 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9087 ins_encode %{
9088 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9089 %}
9090 ins_pipe(pipe_cmov_reg);
9091 %}
9092
9093 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9094 predicate(UseAPX);
9095 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9096
9097 ins_cost(200);
9098 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9099 ins_encode %{
9100 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9101 %}
9102 ins_pipe(pipe_cmov_reg);
9103 %}
9104
9105 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9106 %{
9107 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9108 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9109
9110 ins_cost(100); // XXX
9111 format %{ "setbn$cop $dst\t# unsigned, int" %}
9112 ins_encode %{
9113 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9114 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9115 %}
9116 ins_pipe(ialu_reg);
9117 %}
9118
9119 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9120 predicate(!UseAPX);
9121 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9122 ins_cost(200);
9123 expand %{
9124 cmovI_regU(cop, cr, dst, src);
9125 %}
9126 %}
9127
9128 instruct cmovI_regUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, rRegI src2) %{
9129 predicate(UseAPX);
9130 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9131 ins_cost(200);
9132 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9133 ins_encode %{
9134 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9135 %}
9136 ins_pipe(pipe_cmov_reg);
9137 %}
9138
9139 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9140 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9141 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9142
9143 ins_cost(200); // XXX
9144 format %{ "cmovpl $dst, $src\n\t"
9145 "cmovnel $dst, $src" %}
9146 ins_encode %{
9147 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9148 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9149 %}
9150 ins_pipe(pipe_cmov_reg);
9151 %}
9152
9153 instruct cmovI_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9154 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9155 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9156 effect(TEMP dst);
9157
9158 ins_cost(200);
9159 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9160 "cmovnel $dst, $src2" %}
9161 ins_encode %{
9162 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9163 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9164 %}
9165 ins_pipe(pipe_cmov_reg);
9166 %}
9167
9168 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9169 // inputs of the CMove
9170 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9171 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9172 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9173 effect(TEMP dst);
9174
9175 ins_cost(200); // XXX
9176 format %{ "cmovpl $dst, $src\n\t"
9177 "cmovnel $dst, $src" %}
9178 ins_encode %{
9179 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9180 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9181 %}
9182 ins_pipe(pipe_cmov_reg);
9183 %}
9184
9185 // We need this special handling for only eq / neq comparison since NaN == NaN is false,
9186 // and parity flag bit is set if any of the operand is a NaN.
9187 instruct cmovI_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src1, rRegI src2) %{
9188 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9189 match(Set dst (CMoveI (Binary cop cr) (Binary src2 src1)));
9190 effect(TEMP dst);
9191
9192 ins_cost(200);
9193 format %{ "ecmovpl $dst, $src1, $src2\n\t"
9194 "cmovnel $dst, $src2" %}
9195 ins_encode %{
9196 __ ecmovl(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9197 __ cmovl(Assembler::notEqual, $dst$$Register, $src2$$Register);
9198 %}
9199 ins_pipe(pipe_cmov_reg);
9200 %}
9201
9202 // Conditional move
9203 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9204 predicate(!UseAPX);
9205 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9206
9207 ins_cost(250); // XXX
9208 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9209 ins_encode %{
9210 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9211 %}
9212 ins_pipe(pipe_cmov_mem);
9213 %}
9214
9215 // Conditional move
9216 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9217 %{
9218 predicate(UseAPX);
9219 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9220
9221 ins_cost(250);
9222 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9223 ins_encode %{
9224 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9225 %}
9226 ins_pipe(pipe_cmov_mem);
9227 %}
9228
9229 // Conditional move
9230 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9231 %{
9232 predicate(!UseAPX);
9233 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9234
9235 ins_cost(250); // XXX
9236 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9237 ins_encode %{
9238 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9239 %}
9240 ins_pipe(pipe_cmov_mem);
9241 %}
9242
9243 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9244 predicate(!UseAPX);
9245 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9246 ins_cost(250);
9247 expand %{
9248 cmovI_memU(cop, cr, dst, src);
9249 %}
9250 %}
9251
9252 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9253 %{
9254 predicate(UseAPX);
9255 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9256
9257 ins_cost(250);
9258 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9259 ins_encode %{
9260 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9261 %}
9262 ins_pipe(pipe_cmov_mem);
9263 %}
9264
9265 instruct cmovI_rReg_rReg_memUCF_ndd(rRegI dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegI src1, memory src2)
9266 %{
9267 predicate(UseAPX);
9268 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9269 ins_cost(250);
9270 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9271 ins_encode %{
9272 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9273 %}
9274 ins_pipe(pipe_cmov_mem);
9275 %}
9276
9277 // Conditional move
9278 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9279 %{
9280 predicate(!UseAPX);
9281 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9282
9283 ins_cost(200); // XXX
9284 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9285 ins_encode %{
9286 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9287 %}
9288 ins_pipe(pipe_cmov_reg);
9289 %}
9290
9291 // Conditional move ndd
9292 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9293 %{
9294 predicate(UseAPX);
9295 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9296
9297 ins_cost(200);
9298 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9299 ins_encode %{
9300 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9301 %}
9302 ins_pipe(pipe_cmov_reg);
9303 %}
9304
9305 // Conditional move
9306 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9307 %{
9308 predicate(!UseAPX);
9309 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9310
9311 ins_cost(200); // XXX
9312 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9313 ins_encode %{
9314 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9315 %}
9316 ins_pipe(pipe_cmov_reg);
9317 %}
9318
9319 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9320 predicate(!UseAPX);
9321 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9322 ins_cost(200);
9323 expand %{
9324 cmovN_regU(cop, cr, dst, src);
9325 %}
9326 %}
9327
9328 // Conditional move ndd
9329 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9330 %{
9331 predicate(UseAPX);
9332 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9333
9334 ins_cost(200);
9335 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9336 ins_encode %{
9337 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9338 %}
9339 ins_pipe(pipe_cmov_reg);
9340 %}
9341
9342 instruct cmovN_regUCF_ndd(rRegN dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegN src1, rRegN src2) %{
9343 predicate(UseAPX);
9344 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9345 ins_cost(200);
9346 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9347 ins_encode %{
9348 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9349 %}
9350 ins_pipe(pipe_cmov_reg);
9351 %}
9352
9353 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9354 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9355 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9356
9357 ins_cost(200); // XXX
9358 format %{ "cmovpl $dst, $src\n\t"
9359 "cmovnel $dst, $src" %}
9360 ins_encode %{
9361 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9362 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9363 %}
9364 ins_pipe(pipe_cmov_reg);
9365 %}
9366
9367 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9368 // inputs of the CMove
9369 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9370 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9371 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9372
9373 ins_cost(200); // XXX
9374 format %{ "cmovpl $dst, $src\n\t"
9375 "cmovnel $dst, $src" %}
9376 ins_encode %{
9377 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9378 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9379 %}
9380 ins_pipe(pipe_cmov_reg);
9381 %}
9382
9383 // Conditional move
9384 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9385 %{
9386 predicate(!UseAPX);
9387 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9388
9389 ins_cost(200); // XXX
9390 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9391 ins_encode %{
9392 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9393 %}
9394 ins_pipe(pipe_cmov_reg); // XXX
9395 %}
9396
9397 // Conditional move ndd
9398 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9399 %{
9400 predicate(UseAPX);
9401 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9402
9403 ins_cost(200);
9404 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9405 ins_encode %{
9406 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9407 %}
9408 ins_pipe(pipe_cmov_reg);
9409 %}
9410
9411 // Conditional move
9412 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9413 %{
9414 predicate(!UseAPX);
9415 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9416
9417 ins_cost(200); // XXX
9418 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9419 ins_encode %{
9420 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9421 %}
9422 ins_pipe(pipe_cmov_reg); // XXX
9423 %}
9424
9425 // Conditional move ndd
9426 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9427 %{
9428 predicate(UseAPX);
9429 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9430
9431 ins_cost(200);
9432 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9433 ins_encode %{
9434 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9435 %}
9436 ins_pipe(pipe_cmov_reg);
9437 %}
9438
9439 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9440 predicate(!UseAPX);
9441 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9442 ins_cost(200);
9443 expand %{
9444 cmovP_regU(cop, cr, dst, src);
9445 %}
9446 %}
9447
9448 instruct cmovP_regUCF_ndd(rRegP dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegP src1, rRegP src2) %{
9449 predicate(UseAPX);
9450 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9451 ins_cost(200);
9452 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9453 ins_encode %{
9454 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9455 %}
9456 ins_pipe(pipe_cmov_reg);
9457 %}
9458
9459 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9460 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9461 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9462
9463 ins_cost(200); // XXX
9464 format %{ "cmovpq $dst, $src\n\t"
9465 "cmovneq $dst, $src" %}
9466 ins_encode %{
9467 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9468 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9469 %}
9470 ins_pipe(pipe_cmov_reg);
9471 %}
9472
9473 instruct cmovP_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9474 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9475 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9476 effect(TEMP dst);
9477
9478 ins_cost(200);
9479 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9480 "cmovneq $dst, $src2" %}
9481 ins_encode %{
9482 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9483 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9484 %}
9485 ins_pipe(pipe_cmov_reg);
9486 %}
9487
9488 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9489 // inputs of the CMove
9490 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9491 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9492 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9493
9494 ins_cost(200); // XXX
9495 format %{ "cmovpq $dst, $src\n\t"
9496 "cmovneq $dst, $src" %}
9497 ins_encode %{
9498 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9499 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9500 %}
9501 ins_pipe(pipe_cmov_reg);
9502 %}
9503
9504 instruct cmovP_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src1, rRegP src2) %{
9505 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9506 match(Set dst (CMoveP (Binary cop cr) (Binary src2 src1)));
9507 effect(TEMP dst);
9508
9509 ins_cost(200);
9510 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9511 "cmovneq $dst, $src2" %}
9512 ins_encode %{
9513 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9514 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9515 %}
9516 ins_pipe(pipe_cmov_reg);
9517 %}
9518
9519 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9520 %{
9521 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9522 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9523
9524 ins_cost(100); // XXX
9525 format %{ "setbn$cop $dst\t# signed, long" %}
9526 ins_encode %{
9527 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9528 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9529 %}
9530 ins_pipe(ialu_reg);
9531 %}
9532
9533 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9534 %{
9535 predicate(!UseAPX);
9536 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9537
9538 ins_cost(200); // XXX
9539 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9540 ins_encode %{
9541 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9542 %}
9543 ins_pipe(pipe_cmov_reg); // XXX
9544 %}
9545
9546 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9547 %{
9548 predicate(UseAPX);
9549 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9550
9551 ins_cost(200);
9552 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9553 ins_encode %{
9554 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9555 %}
9556 ins_pipe(pipe_cmov_reg);
9557 %}
9558
9559 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9560 %{
9561 predicate(!UseAPX);
9562 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9563
9564 ins_cost(200); // XXX
9565 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9566 ins_encode %{
9567 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9568 %}
9569 ins_pipe(pipe_cmov_mem); // XXX
9570 %}
9571
9572 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9573 %{
9574 predicate(UseAPX);
9575 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9576
9577 ins_cost(200);
9578 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9579 ins_encode %{
9580 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9581 %}
9582 ins_pipe(pipe_cmov_mem);
9583 %}
9584
9585 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9586 %{
9587 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9588 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9589
9590 ins_cost(100); // XXX
9591 format %{ "setbn$cop $dst\t# unsigned, long" %}
9592 ins_encode %{
9593 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9594 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9595 %}
9596 ins_pipe(ialu_reg);
9597 %}
9598
9599 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9600 %{
9601 predicate(!UseAPX);
9602 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9603
9604 ins_cost(200); // XXX
9605 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9606 ins_encode %{
9607 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9608 %}
9609 ins_pipe(pipe_cmov_reg); // XXX
9610 %}
9611
9612 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9613 %{
9614 predicate(UseAPX);
9615 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9616
9617 ins_cost(200);
9618 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9619 ins_encode %{
9620 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9621 %}
9622 ins_pipe(pipe_cmov_reg);
9623 %}
9624
9625 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9626 %{
9627 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9629
9630 ins_cost(100); // XXX
9631 format %{ "setbn$cop $dst\t# unsigned, long" %}
9632 ins_encode %{
9633 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9634 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9635 %}
9636 ins_pipe(ialu_reg);
9637 %}
9638
9639 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9640 predicate(!UseAPX);
9641 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9642 ins_cost(200);
9643 expand %{
9644 cmovL_regU(cop, cr, dst, src);
9645 %}
9646 %}
9647
9648 instruct cmovL_regUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, rRegL src2)
9649 %{
9650 predicate(UseAPX);
9651 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9652 ins_cost(200);
9653 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9654 ins_encode %{
9655 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9656 %}
9657 ins_pipe(pipe_cmov_reg);
9658 %}
9659
9660 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9661 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9662 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9663
9664 ins_cost(200); // XXX
9665 format %{ "cmovpq $dst, $src\n\t"
9666 "cmovneq $dst, $src" %}
9667 ins_encode %{
9668 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9669 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9670 %}
9671 ins_pipe(pipe_cmov_reg);
9672 %}
9673
9674 instruct cmovL_regUCF2_ne_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9675 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9676 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9677 effect(TEMP dst);
9678
9679 ins_cost(200);
9680 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9681 "cmovneq $dst, $src2" %}
9682 ins_encode %{
9683 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9684 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9685 %}
9686 ins_pipe(pipe_cmov_reg);
9687 %}
9688
9689 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9690 // inputs of the CMove
9691 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9692 predicate(!UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9693 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9694
9695 ins_cost(200); // XXX
9696 format %{ "cmovpq $dst, $src\n\t"
9697 "cmovneq $dst, $src" %}
9698 ins_encode %{
9699 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9700 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9701 %}
9702 ins_pipe(pipe_cmov_reg);
9703 %}
9704
9705 instruct cmovL_regUCF2_eq_ndd(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src1, rRegL src2) %{
9706 predicate(UseAPX && n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9707 match(Set dst (CMoveL (Binary cop cr) (Binary src2 src1)));
9708 effect(TEMP dst);
9709
9710 ins_cost(200);
9711 format %{ "ecmovpq $dst, $src1, $src2\n\t"
9712 "cmovneq $dst, $src2" %}
9713 ins_encode %{
9714 __ ecmovq(Assembler::parity, $dst$$Register, $src1$$Register, $src2$$Register);
9715 __ cmovq(Assembler::notEqual, $dst$$Register, $src2$$Register);
9716 %}
9717 ins_pipe(pipe_cmov_reg);
9718 %}
9719
9720 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9721 %{
9722 predicate(!UseAPX);
9723 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9724
9725 ins_cost(200); // XXX
9726 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9727 ins_encode %{
9728 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9729 %}
9730 ins_pipe(pipe_cmov_mem); // XXX
9731 %}
9732
9733 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9734 predicate(!UseAPX);
9735 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9736 ins_cost(200);
9737 expand %{
9738 cmovL_memU(cop, cr, dst, src);
9739 %}
9740 %}
9741
9742 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9743 %{
9744 predicate(UseAPX);
9745 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9746
9747 ins_cost(200);
9748 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9749 ins_encode %{
9750 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9751 %}
9752 ins_pipe(pipe_cmov_mem);
9753 %}
9754
9755 instruct cmovL_rReg_rReg_memUCF_ndd(rRegL dst, cmpOpUCF cop, rFlagsRegUCF cr, rRegL src1, memory src2)
9756 %{
9757 predicate(UseAPX);
9758 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9759 ins_cost(200);
9760 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9761 ins_encode %{
9762 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9763 %}
9764 ins_pipe(pipe_cmov_mem);
9765 %}
9766
9767 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9768 %{
9769 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9770
9771 ins_cost(200); // XXX
9772 format %{ "jn$cop skip\t# signed cmove float\n\t"
9773 "movss $dst, $src\n"
9774 "skip:" %}
9775 ins_encode %{
9776 Label Lskip;
9777 // Invert sense of branch from sense of CMOV
9778 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9779 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9780 __ bind(Lskip);
9781 %}
9782 ins_pipe(pipe_slow);
9783 %}
9784
9785 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9786 %{
9787 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9788
9789 ins_cost(200); // XXX
9790 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9791 "movss $dst, $src\n"
9792 "skip:" %}
9793 ins_encode %{
9794 Label Lskip;
9795 // Invert sense of branch from sense of CMOV
9796 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9797 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9798 __ bind(Lskip);
9799 %}
9800 ins_pipe(pipe_slow);
9801 %}
9802
9803 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9804 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9805 ins_cost(200);
9806 expand %{
9807 cmovF_regU(cop, cr, dst, src);
9808 %}
9809 %}
9810
9811 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9812 %{
9813 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9814
9815 ins_cost(200); // XXX
9816 format %{ "jn$cop skip\t# signed cmove double\n\t"
9817 "movsd $dst, $src\n"
9818 "skip:" %}
9819 ins_encode %{
9820 Label Lskip;
9821 // Invert sense of branch from sense of CMOV
9822 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9823 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9824 __ bind(Lskip);
9825 %}
9826 ins_pipe(pipe_slow);
9827 %}
9828
9829 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9830 %{
9831 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9832
9833 ins_cost(200); // XXX
9834 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9835 "movsd $dst, $src\n"
9836 "skip:" %}
9837 ins_encode %{
9838 Label Lskip;
9839 // Invert sense of branch from sense of CMOV
9840 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9841 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9842 __ bind(Lskip);
9843 %}
9844 ins_pipe(pipe_slow);
9845 %}
9846
9847 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9848 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9849 ins_cost(200);
9850 expand %{
9851 cmovD_regU(cop, cr, dst, src);
9852 %}
9853 %}
9854
9855 //----------Arithmetic Instructions--------------------------------------------
9856 //----------Addition Instructions----------------------------------------------
9857
9858 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9859 %{
9860 predicate(!UseAPX);
9861 match(Set dst (AddI dst src));
9862 effect(KILL cr);
9863 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9864 format %{ "addl $dst, $src\t# int" %}
9865 ins_encode %{
9866 __ addl($dst$$Register, $src$$Register);
9867 %}
9868 ins_pipe(ialu_reg_reg);
9869 %}
9870
9871 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9872 %{
9873 predicate(UseAPX);
9874 match(Set dst (AddI src1 src2));
9875 effect(KILL cr);
9876 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9877
9878 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9879 ins_encode %{
9880 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9881 %}
9882 ins_pipe(ialu_reg_reg);
9883 %}
9884
9885 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9886 %{
9887 predicate(!UseAPX);
9888 match(Set dst (AddI dst src));
9889 effect(KILL cr);
9890 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9891
9892 format %{ "addl $dst, $src\t# int" %}
9893 ins_encode %{
9894 __ addl($dst$$Register, $src$$constant);
9895 %}
9896 ins_pipe( ialu_reg );
9897 %}
9898
9899 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9900 %{
9901 predicate(UseAPX);
9902 match(Set dst (AddI src1 src2));
9903 effect(KILL cr);
9904 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9905
9906 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9907 ins_encode %{
9908 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9909 %}
9910 ins_pipe( ialu_reg );
9911 %}
9912
9913 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9914 %{
9915 predicate(UseAPX);
9916 match(Set dst (AddI (LoadI src1) src2));
9917 effect(KILL cr);
9918 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9919
9920 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9921 ins_encode %{
9922 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9923 %}
9924 ins_pipe( ialu_reg );
9925 %}
9926
9927 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9928 %{
9929 predicate(!UseAPX);
9930 match(Set dst (AddI dst (LoadI src)));
9931 effect(KILL cr);
9932 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9933
9934 ins_cost(150); // XXX
9935 format %{ "addl $dst, $src\t# int" %}
9936 ins_encode %{
9937 __ addl($dst$$Register, $src$$Address);
9938 %}
9939 ins_pipe(ialu_reg_mem);
9940 %}
9941
9942 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9943 %{
9944 predicate(UseAPX);
9945 match(Set dst (AddI src1 (LoadI src2)));
9946 effect(KILL cr);
9947 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9948
9949 ins_cost(150);
9950 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9951 ins_encode %{
9952 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9953 %}
9954 ins_pipe(ialu_reg_mem);
9955 %}
9956
9957 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
9958 %{
9959 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9960 effect(KILL cr);
9961 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9962
9963 ins_cost(150); // XXX
9964 format %{ "addl $dst, $src\t# int" %}
9965 ins_encode %{
9966 __ addl($dst$$Address, $src$$Register);
9967 %}
9968 ins_pipe(ialu_mem_reg);
9969 %}
9970
9971 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
9972 %{
9973 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
9974 effect(KILL cr);
9975 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9976
9977
9978 ins_cost(125); // XXX
9979 format %{ "addl $dst, $src\t# int" %}
9980 ins_encode %{
9981 __ addl($dst$$Address, $src$$constant);
9982 %}
9983 ins_pipe(ialu_mem_imm);
9984 %}
9985
9986 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
9987 %{
9988 predicate(!UseAPX && UseIncDec);
9989 match(Set dst (AddI dst src));
9990 effect(KILL cr);
9991
9992 format %{ "incl $dst\t# int" %}
9993 ins_encode %{
9994 __ incrementl($dst$$Register);
9995 %}
9996 ins_pipe(ialu_reg);
9997 %}
9998
9999 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10000 %{
10001 predicate(UseAPX && UseIncDec);
10002 match(Set dst (AddI src val));
10003 effect(KILL cr);
10004 flag(PD::Flag_ndd_demotable_opr1);
10005
10006 format %{ "eincl $dst, $src\t# int ndd" %}
10007 ins_encode %{
10008 __ eincl($dst$$Register, $src$$Register, false);
10009 %}
10010 ins_pipe(ialu_reg);
10011 %}
10012
10013 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10014 %{
10015 predicate(UseAPX && UseIncDec);
10016 match(Set dst (AddI (LoadI src) val));
10017 effect(KILL cr);
10018
10019 format %{ "eincl $dst, $src\t# int ndd" %}
10020 ins_encode %{
10021 __ eincl($dst$$Register, $src$$Address, false);
10022 %}
10023 ins_pipe(ialu_reg);
10024 %}
10025
10026 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10027 %{
10028 predicate(UseIncDec);
10029 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10030 effect(KILL cr);
10031
10032 ins_cost(125); // XXX
10033 format %{ "incl $dst\t# int" %}
10034 ins_encode %{
10035 __ incrementl($dst$$Address);
10036 %}
10037 ins_pipe(ialu_mem_imm);
10038 %}
10039
10040 // XXX why does that use AddI
10041 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10042 %{
10043 predicate(!UseAPX && UseIncDec);
10044 match(Set dst (AddI dst src));
10045 effect(KILL cr);
10046
10047 format %{ "decl $dst\t# int" %}
10048 ins_encode %{
10049 __ decrementl($dst$$Register);
10050 %}
10051 ins_pipe(ialu_reg);
10052 %}
10053
10054 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10055 %{
10056 predicate(UseAPX && UseIncDec);
10057 match(Set dst (AddI src val));
10058 effect(KILL cr);
10059 flag(PD::Flag_ndd_demotable_opr1);
10060
10061 format %{ "edecl $dst, $src\t# int ndd" %}
10062 ins_encode %{
10063 __ edecl($dst$$Register, $src$$Register, false);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10069 %{
10070 predicate(UseAPX && UseIncDec);
10071 match(Set dst (AddI (LoadI src) val));
10072 effect(KILL cr);
10073
10074 format %{ "edecl $dst, $src\t# int ndd" %}
10075 ins_encode %{
10076 __ edecl($dst$$Register, $src$$Address, false);
10077 %}
10078 ins_pipe(ialu_reg);
10079 %}
10080
10081 // XXX why does that use AddI
10082 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10083 %{
10084 predicate(UseIncDec);
10085 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10086 effect(KILL cr);
10087
10088 ins_cost(125); // XXX
10089 format %{ "decl $dst\t# int" %}
10090 ins_encode %{
10091 __ decrementl($dst$$Address);
10092 %}
10093 ins_pipe(ialu_mem_imm);
10094 %}
10095
10096 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10097 %{
10098 predicate(VM_Version::supports_fast_2op_lea());
10099 match(Set dst (AddI (LShiftI index scale) disp));
10100
10101 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10102 ins_encode %{
10103 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10104 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10105 %}
10106 ins_pipe(ialu_reg_reg);
10107 %}
10108
10109 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10110 %{
10111 predicate(VM_Version::supports_fast_3op_lea());
10112 match(Set dst (AddI (AddI base index) disp));
10113
10114 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10115 ins_encode %{
10116 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10117 %}
10118 ins_pipe(ialu_reg_reg);
10119 %}
10120
10121 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10122 %{
10123 predicate(VM_Version::supports_fast_2op_lea());
10124 match(Set dst (AddI base (LShiftI index scale)));
10125
10126 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10127 ins_encode %{
10128 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10129 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10130 %}
10131 ins_pipe(ialu_reg_reg);
10132 %}
10133
10134 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10135 %{
10136 predicate(VM_Version::supports_fast_3op_lea());
10137 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10138
10139 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10140 ins_encode %{
10141 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10142 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10143 %}
10144 ins_pipe(ialu_reg_reg);
10145 %}
10146
10147 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10148 %{
10149 predicate(!UseAPX);
10150 match(Set dst (AddL dst src));
10151 effect(KILL cr);
10152 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10153
10154 format %{ "addq $dst, $src\t# long" %}
10155 ins_encode %{
10156 __ addq($dst$$Register, $src$$Register);
10157 %}
10158 ins_pipe(ialu_reg_reg);
10159 %}
10160
10161 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10162 %{
10163 predicate(UseAPX);
10164 match(Set dst (AddL src1 src2));
10165 effect(KILL cr);
10166 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10167
10168 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10169 ins_encode %{
10170 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10171 %}
10172 ins_pipe(ialu_reg_reg);
10173 %}
10174
10175 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10176 %{
10177 predicate(!UseAPX);
10178 match(Set dst (AddL dst src));
10179 effect(KILL cr);
10180 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10181
10182 format %{ "addq $dst, $src\t# long" %}
10183 ins_encode %{
10184 __ addq($dst$$Register, $src$$constant);
10185 %}
10186 ins_pipe( ialu_reg );
10187 %}
10188
10189 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10190 %{
10191 predicate(UseAPX);
10192 match(Set dst (AddL src1 src2));
10193 effect(KILL cr);
10194 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10195
10196 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10197 ins_encode %{
10198 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10199 %}
10200 ins_pipe( ialu_reg );
10201 %}
10202
10203 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10204 %{
10205 predicate(UseAPX);
10206 match(Set dst (AddL (LoadL src1) src2));
10207 effect(KILL cr);
10208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10209
10210 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10211 ins_encode %{
10212 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10213 %}
10214 ins_pipe( ialu_reg );
10215 %}
10216
10217 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10218 %{
10219 predicate(!UseAPX);
10220 match(Set dst (AddL dst (LoadL src)));
10221 effect(KILL cr);
10222 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10223
10224 ins_cost(150); // XXX
10225 format %{ "addq $dst, $src\t# long" %}
10226 ins_encode %{
10227 __ addq($dst$$Register, $src$$Address);
10228 %}
10229 ins_pipe(ialu_reg_mem);
10230 %}
10231
10232 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10233 %{
10234 predicate(UseAPX);
10235 match(Set dst (AddL src1 (LoadL src2)));
10236 effect(KILL cr);
10237 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10238
10239 ins_cost(150);
10240 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10241 ins_encode %{
10242 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10243 %}
10244 ins_pipe(ialu_reg_mem);
10245 %}
10246
10247 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10248 %{
10249 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10250 effect(KILL cr);
10251 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10252
10253 ins_cost(150); // XXX
10254 format %{ "addq $dst, $src\t# long" %}
10255 ins_encode %{
10256 __ addq($dst$$Address, $src$$Register);
10257 %}
10258 ins_pipe(ialu_mem_reg);
10259 %}
10260
10261 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10262 %{
10263 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10264 effect(KILL cr);
10265 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10266
10267 ins_cost(125); // XXX
10268 format %{ "addq $dst, $src\t# long" %}
10269 ins_encode %{
10270 __ addq($dst$$Address, $src$$constant);
10271 %}
10272 ins_pipe(ialu_mem_imm);
10273 %}
10274
10275 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10276 %{
10277 predicate(!UseAPX && UseIncDec);
10278 match(Set dst (AddL dst src));
10279 effect(KILL cr);
10280
10281 format %{ "incq $dst\t# long" %}
10282 ins_encode %{
10283 __ incrementq($dst$$Register);
10284 %}
10285 ins_pipe(ialu_reg);
10286 %}
10287
10288 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10289 %{
10290 predicate(UseAPX && UseIncDec);
10291 match(Set dst (AddL src val));
10292 effect(KILL cr);
10293 flag(PD::Flag_ndd_demotable_opr1);
10294
10295 format %{ "eincq $dst, $src\t# long ndd" %}
10296 ins_encode %{
10297 __ eincq($dst$$Register, $src$$Register, false);
10298 %}
10299 ins_pipe(ialu_reg);
10300 %}
10301
10302 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10303 %{
10304 predicate(UseAPX && UseIncDec);
10305 match(Set dst (AddL (LoadL src) val));
10306 effect(KILL cr);
10307
10308 format %{ "eincq $dst, $src\t# long ndd" %}
10309 ins_encode %{
10310 __ eincq($dst$$Register, $src$$Address, false);
10311 %}
10312 ins_pipe(ialu_reg);
10313 %}
10314
10315 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10316 %{
10317 predicate(UseIncDec);
10318 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10319 effect(KILL cr);
10320
10321 ins_cost(125); // XXX
10322 format %{ "incq $dst\t# long" %}
10323 ins_encode %{
10324 __ incrementq($dst$$Address);
10325 %}
10326 ins_pipe(ialu_mem_imm);
10327 %}
10328
10329 // XXX why does that use AddL
10330 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10331 %{
10332 predicate(!UseAPX && UseIncDec);
10333 match(Set dst (AddL dst src));
10334 effect(KILL cr);
10335
10336 format %{ "decq $dst\t# long" %}
10337 ins_encode %{
10338 __ decrementq($dst$$Register);
10339 %}
10340 ins_pipe(ialu_reg);
10341 %}
10342
10343 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10344 %{
10345 predicate(UseAPX && UseIncDec);
10346 match(Set dst (AddL src val));
10347 effect(KILL cr);
10348 flag(PD::Flag_ndd_demotable_opr1);
10349
10350 format %{ "edecq $dst, $src\t# long ndd" %}
10351 ins_encode %{
10352 __ edecq($dst$$Register, $src$$Register, false);
10353 %}
10354 ins_pipe(ialu_reg);
10355 %}
10356
10357 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10358 %{
10359 predicate(UseAPX && UseIncDec);
10360 match(Set dst (AddL (LoadL src) val));
10361 effect(KILL cr);
10362
10363 format %{ "edecq $dst, $src\t# long ndd" %}
10364 ins_encode %{
10365 __ edecq($dst$$Register, $src$$Address, false);
10366 %}
10367 ins_pipe(ialu_reg);
10368 %}
10369
10370 // XXX why does that use AddL
10371 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10372 %{
10373 predicate(UseIncDec);
10374 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10375 effect(KILL cr);
10376
10377 ins_cost(125); // XXX
10378 format %{ "decq $dst\t# long" %}
10379 ins_encode %{
10380 __ decrementq($dst$$Address);
10381 %}
10382 ins_pipe(ialu_mem_imm);
10383 %}
10384
10385 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10386 %{
10387 predicate(VM_Version::supports_fast_2op_lea());
10388 match(Set dst (AddL (LShiftL index scale) disp));
10389
10390 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10391 ins_encode %{
10392 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10393 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10394 %}
10395 ins_pipe(ialu_reg_reg);
10396 %}
10397
10398 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10399 %{
10400 predicate(VM_Version::supports_fast_3op_lea());
10401 match(Set dst (AddL (AddL base index) disp));
10402
10403 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10404 ins_encode %{
10405 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10406 %}
10407 ins_pipe(ialu_reg_reg);
10408 %}
10409
10410 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10411 %{
10412 predicate(VM_Version::supports_fast_2op_lea());
10413 match(Set dst (AddL base (LShiftL index scale)));
10414
10415 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10416 ins_encode %{
10417 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10418 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10419 %}
10420 ins_pipe(ialu_reg_reg);
10421 %}
10422
10423 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10424 %{
10425 predicate(VM_Version::supports_fast_3op_lea());
10426 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10427
10428 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10429 ins_encode %{
10430 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10431 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10432 %}
10433 ins_pipe(ialu_reg_reg);
10434 %}
10435
10436 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10437 %{
10438 match(Set dst (AddP dst src));
10439 effect(KILL cr);
10440 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10441
10442 format %{ "addq $dst, $src\t# ptr" %}
10443 ins_encode %{
10444 __ addq($dst$$Register, $src$$Register);
10445 %}
10446 ins_pipe(ialu_reg_reg);
10447 %}
10448
10449 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10450 %{
10451 match(Set dst (AddP dst src));
10452 effect(KILL cr);
10453 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10454
10455 format %{ "addq $dst, $src\t# ptr" %}
10456 ins_encode %{
10457 __ addq($dst$$Register, $src$$constant);
10458 %}
10459 ins_pipe( ialu_reg );
10460 %}
10461
10462 // XXX addP mem ops ????
10463
10464 instruct checkCastPP(rRegP dst)
10465 %{
10466 match(Set dst (CheckCastPP dst));
10467
10468 size(0);
10469 format %{ "# checkcastPP of $dst" %}
10470 ins_encode(/* empty encoding */);
10471 ins_pipe(empty);
10472 %}
10473
10474 instruct castPP(rRegP dst)
10475 %{
10476 match(Set dst (CastPP dst));
10477
10478 size(0);
10479 format %{ "# castPP of $dst" %}
10480 ins_encode(/* empty encoding */);
10481 ins_pipe(empty);
10482 %}
10483
10484 instruct castII(rRegI dst)
10485 %{
10486 predicate(VerifyConstraintCasts == 0);
10487 match(Set dst (CastII dst));
10488
10489 size(0);
10490 format %{ "# castII of $dst" %}
10491 ins_encode(/* empty encoding */);
10492 ins_cost(0);
10493 ins_pipe(empty);
10494 %}
10495
10496 instruct castII_checked(rRegI dst, rFlagsReg cr)
10497 %{
10498 predicate(VerifyConstraintCasts > 0);
10499 match(Set dst (CastII dst));
10500
10501 effect(KILL cr);
10502 format %{ "# cast_checked_II $dst" %}
10503 ins_encode %{
10504 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10505 %}
10506 ins_pipe(pipe_slow);
10507 %}
10508
10509 instruct castLL(rRegL dst)
10510 %{
10511 predicate(VerifyConstraintCasts == 0);
10512 match(Set dst (CastLL dst));
10513
10514 size(0);
10515 format %{ "# castLL of $dst" %}
10516 ins_encode(/* empty encoding */);
10517 ins_cost(0);
10518 ins_pipe(empty);
10519 %}
10520
10521 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10522 %{
10523 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10524 match(Set dst (CastLL dst));
10525
10526 effect(KILL cr);
10527 format %{ "# cast_checked_LL $dst" %}
10528 ins_encode %{
10529 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10530 %}
10531 ins_pipe(pipe_slow);
10532 %}
10533
10534 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10535 %{
10536 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10537 match(Set dst (CastLL dst));
10538
10539 effect(KILL cr, TEMP tmp);
10540 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10541 ins_encode %{
10542 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10543 %}
10544 ins_pipe(pipe_slow);
10545 %}
10546
10547 instruct castFF(regF dst)
10548 %{
10549 match(Set dst (CastFF dst));
10550
10551 size(0);
10552 format %{ "# castFF of $dst" %}
10553 ins_encode(/* empty encoding */);
10554 ins_cost(0);
10555 ins_pipe(empty);
10556 %}
10557
10558 instruct castHH(regF dst)
10559 %{
10560 match(Set dst (CastHH dst));
10561
10562 size(0);
10563 format %{ "# castHH of $dst" %}
10564 ins_encode(/* empty encoding */);
10565 ins_cost(0);
10566 ins_pipe(empty);
10567 %}
10568
10569 instruct castDD(regD dst)
10570 %{
10571 match(Set dst (CastDD dst));
10572
10573 size(0);
10574 format %{ "# castDD of $dst" %}
10575 ins_encode(/* empty encoding */);
10576 ins_cost(0);
10577 ins_pipe(empty);
10578 %}
10579
10580 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10581 instruct compareAndSwapP(rRegI res,
10582 memory mem_ptr,
10583 rax_RegP oldval, rRegP newval,
10584 rFlagsReg cr)
10585 %{
10586 predicate(n->as_LoadStore()->barrier_data() == 0);
10587 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10588 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10589 effect(KILL cr, KILL oldval);
10590
10591 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10592 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10593 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10594 ins_encode %{
10595 __ lock();
10596 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10597 __ setcc(Assembler::equal, $res$$Register);
10598 %}
10599 ins_pipe( pipe_cmpxchg );
10600 %}
10601
10602 instruct compareAndSwapL(rRegI res,
10603 memory mem_ptr,
10604 rax_RegL oldval, rRegL newval,
10605 rFlagsReg cr)
10606 %{
10607 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10608 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10609 effect(KILL cr, KILL oldval);
10610
10611 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10612 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10613 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10614 ins_encode %{
10615 __ lock();
10616 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10617 __ setcc(Assembler::equal, $res$$Register);
10618 %}
10619 ins_pipe( pipe_cmpxchg );
10620 %}
10621
10622 instruct compareAndSwapI(rRegI res,
10623 memory mem_ptr,
10624 rax_RegI oldval, rRegI newval,
10625 rFlagsReg cr)
10626 %{
10627 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10628 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10629 effect(KILL cr, KILL oldval);
10630
10631 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10632 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10633 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10634 ins_encode %{
10635 __ lock();
10636 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10637 __ setcc(Assembler::equal, $res$$Register);
10638 %}
10639 ins_pipe( pipe_cmpxchg );
10640 %}
10641
10642 instruct compareAndSwapB(rRegI res,
10643 memory mem_ptr,
10644 rax_RegI oldval, rRegI newval,
10645 rFlagsReg cr)
10646 %{
10647 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10648 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10649 effect(KILL cr, KILL oldval);
10650
10651 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10652 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10653 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10654 ins_encode %{
10655 __ lock();
10656 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10657 __ setcc(Assembler::equal, $res$$Register);
10658 %}
10659 ins_pipe( pipe_cmpxchg );
10660 %}
10661
10662 instruct compareAndSwapS(rRegI res,
10663 memory mem_ptr,
10664 rax_RegI oldval, rRegI newval,
10665 rFlagsReg cr)
10666 %{
10667 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10668 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10669 effect(KILL cr, KILL oldval);
10670
10671 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10672 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10673 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10674 ins_encode %{
10675 __ lock();
10676 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10677 __ setcc(Assembler::equal, $res$$Register);
10678 %}
10679 ins_pipe( pipe_cmpxchg );
10680 %}
10681
10682 instruct compareAndSwapN(rRegI res,
10683 memory mem_ptr,
10684 rax_RegN oldval, rRegN newval,
10685 rFlagsReg cr) %{
10686 predicate(n->as_LoadStore()->barrier_data() == 0);
10687 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10688 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10689 effect(KILL cr, KILL oldval);
10690
10691 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10692 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10693 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10694 ins_encode %{
10695 __ lock();
10696 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697 __ setcc(Assembler::equal, $res$$Register);
10698 %}
10699 ins_pipe( pipe_cmpxchg );
10700 %}
10701
10702 instruct compareAndExchangeB(
10703 memory mem_ptr,
10704 rax_RegI oldval, rRegI newval,
10705 rFlagsReg cr)
10706 %{
10707 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10708 effect(KILL cr);
10709
10710 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10711 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10712 ins_encode %{
10713 __ lock();
10714 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10715 %}
10716 ins_pipe( pipe_cmpxchg );
10717 %}
10718
10719 instruct compareAndExchangeS(
10720 memory mem_ptr,
10721 rax_RegI oldval, rRegI newval,
10722 rFlagsReg cr)
10723 %{
10724 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10725 effect(KILL cr);
10726
10727 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10728 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10729 ins_encode %{
10730 __ lock();
10731 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10732 %}
10733 ins_pipe( pipe_cmpxchg );
10734 %}
10735
10736 instruct compareAndExchangeI(
10737 memory mem_ptr,
10738 rax_RegI oldval, rRegI newval,
10739 rFlagsReg cr)
10740 %{
10741 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10742 effect(KILL cr);
10743
10744 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10745 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10746 ins_encode %{
10747 __ lock();
10748 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10749 %}
10750 ins_pipe( pipe_cmpxchg );
10751 %}
10752
10753 instruct compareAndExchangeL(
10754 memory mem_ptr,
10755 rax_RegL oldval, rRegL newval,
10756 rFlagsReg cr)
10757 %{
10758 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10759 effect(KILL cr);
10760
10761 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10762 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10763 ins_encode %{
10764 __ lock();
10765 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10766 %}
10767 ins_pipe( pipe_cmpxchg );
10768 %}
10769
10770 instruct compareAndExchangeN(
10771 memory mem_ptr,
10772 rax_RegN oldval, rRegN newval,
10773 rFlagsReg cr) %{
10774 predicate(n->as_LoadStore()->barrier_data() == 0);
10775 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10776 effect(KILL cr);
10777
10778 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10779 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10780 ins_encode %{
10781 __ lock();
10782 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10783 %}
10784 ins_pipe( pipe_cmpxchg );
10785 %}
10786
10787 instruct compareAndExchangeP(
10788 memory mem_ptr,
10789 rax_RegP oldval, rRegP newval,
10790 rFlagsReg cr)
10791 %{
10792 predicate(n->as_LoadStore()->barrier_data() == 0);
10793 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10794 effect(KILL cr);
10795
10796 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10797 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10798 ins_encode %{
10799 __ lock();
10800 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10801 %}
10802 ins_pipe( pipe_cmpxchg );
10803 %}
10804
10805 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10806 predicate(n->as_LoadStore()->result_not_used());
10807 match(Set dummy (GetAndAddB mem add));
10808 effect(KILL cr);
10809 format %{ "addb_lock $mem, $add" %}
10810 ins_encode %{
10811 __ lock();
10812 __ addb($mem$$Address, $add$$Register);
10813 %}
10814 ins_pipe(pipe_cmpxchg);
10815 %}
10816
10817 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10818 predicate(n->as_LoadStore()->result_not_used());
10819 match(Set dummy (GetAndAddB mem add));
10820 effect(KILL cr);
10821 format %{ "addb_lock $mem, $add" %}
10822 ins_encode %{
10823 __ lock();
10824 __ addb($mem$$Address, $add$$constant);
10825 %}
10826 ins_pipe(pipe_cmpxchg);
10827 %}
10828
10829 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10830 predicate(!n->as_LoadStore()->result_not_used());
10831 match(Set newval (GetAndAddB mem newval));
10832 effect(KILL cr);
10833 format %{ "xaddb_lock $mem, $newval" %}
10834 ins_encode %{
10835 __ lock();
10836 __ xaddb($mem$$Address, $newval$$Register);
10837 %}
10838 ins_pipe(pipe_cmpxchg);
10839 %}
10840
10841 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10842 predicate(n->as_LoadStore()->result_not_used());
10843 match(Set dummy (GetAndAddS mem add));
10844 effect(KILL cr);
10845 format %{ "addw_lock $mem, $add" %}
10846 ins_encode %{
10847 __ lock();
10848 __ addw($mem$$Address, $add$$Register);
10849 %}
10850 ins_pipe(pipe_cmpxchg);
10851 %}
10852
10853 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10854 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10855 match(Set dummy (GetAndAddS mem add));
10856 effect(KILL cr);
10857 format %{ "addw_lock $mem, $add" %}
10858 ins_encode %{
10859 __ lock();
10860 __ addw($mem$$Address, $add$$constant);
10861 %}
10862 ins_pipe(pipe_cmpxchg);
10863 %}
10864
10865 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10866 predicate(!n->as_LoadStore()->result_not_used());
10867 match(Set newval (GetAndAddS mem newval));
10868 effect(KILL cr);
10869 format %{ "xaddw_lock $mem, $newval" %}
10870 ins_encode %{
10871 __ lock();
10872 __ xaddw($mem$$Address, $newval$$Register);
10873 %}
10874 ins_pipe(pipe_cmpxchg);
10875 %}
10876
10877 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878 predicate(n->as_LoadStore()->result_not_used());
10879 match(Set dummy (GetAndAddI mem add));
10880 effect(KILL cr);
10881 format %{ "addl_lock $mem, $add" %}
10882 ins_encode %{
10883 __ lock();
10884 __ addl($mem$$Address, $add$$Register);
10885 %}
10886 ins_pipe(pipe_cmpxchg);
10887 %}
10888
10889 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890 predicate(n->as_LoadStore()->result_not_used());
10891 match(Set dummy (GetAndAddI mem add));
10892 effect(KILL cr);
10893 format %{ "addl_lock $mem, $add" %}
10894 ins_encode %{
10895 __ lock();
10896 __ addl($mem$$Address, $add$$constant);
10897 %}
10898 ins_pipe(pipe_cmpxchg);
10899 %}
10900
10901 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10902 predicate(!n->as_LoadStore()->result_not_used());
10903 match(Set newval (GetAndAddI mem newval));
10904 effect(KILL cr);
10905 format %{ "xaddl_lock $mem, $newval" %}
10906 ins_encode %{
10907 __ lock();
10908 __ xaddl($mem$$Address, $newval$$Register);
10909 %}
10910 ins_pipe(pipe_cmpxchg);
10911 %}
10912
10913 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10914 predicate(n->as_LoadStore()->result_not_used());
10915 match(Set dummy (GetAndAddL mem add));
10916 effect(KILL cr);
10917 format %{ "addq_lock $mem, $add" %}
10918 ins_encode %{
10919 __ lock();
10920 __ addq($mem$$Address, $add$$Register);
10921 %}
10922 ins_pipe(pipe_cmpxchg);
10923 %}
10924
10925 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10926 predicate(n->as_LoadStore()->result_not_used());
10927 match(Set dummy (GetAndAddL mem add));
10928 effect(KILL cr);
10929 format %{ "addq_lock $mem, $add" %}
10930 ins_encode %{
10931 __ lock();
10932 __ addq($mem$$Address, $add$$constant);
10933 %}
10934 ins_pipe(pipe_cmpxchg);
10935 %}
10936
10937 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10938 predicate(!n->as_LoadStore()->result_not_used());
10939 match(Set newval (GetAndAddL mem newval));
10940 effect(KILL cr);
10941 format %{ "xaddq_lock $mem, $newval" %}
10942 ins_encode %{
10943 __ lock();
10944 __ xaddq($mem$$Address, $newval$$Register);
10945 %}
10946 ins_pipe(pipe_cmpxchg);
10947 %}
10948
10949 instruct xchgB( memory mem, rRegI newval) %{
10950 match(Set newval (GetAndSetB mem newval));
10951 format %{ "XCHGB $newval,[$mem]" %}
10952 ins_encode %{
10953 __ xchgb($newval$$Register, $mem$$Address);
10954 %}
10955 ins_pipe( pipe_cmpxchg );
10956 %}
10957
10958 instruct xchgS( memory mem, rRegI newval) %{
10959 match(Set newval (GetAndSetS mem newval));
10960 format %{ "XCHGW $newval,[$mem]" %}
10961 ins_encode %{
10962 __ xchgw($newval$$Register, $mem$$Address);
10963 %}
10964 ins_pipe( pipe_cmpxchg );
10965 %}
10966
10967 instruct xchgI( memory mem, rRegI newval) %{
10968 match(Set newval (GetAndSetI mem newval));
10969 format %{ "XCHGL $newval,[$mem]" %}
10970 ins_encode %{
10971 __ xchgl($newval$$Register, $mem$$Address);
10972 %}
10973 ins_pipe( pipe_cmpxchg );
10974 %}
10975
10976 instruct xchgL( memory mem, rRegL newval) %{
10977 match(Set newval (GetAndSetL mem newval));
10978 format %{ "XCHGL $newval,[$mem]" %}
10979 ins_encode %{
10980 __ xchgq($newval$$Register, $mem$$Address);
10981 %}
10982 ins_pipe( pipe_cmpxchg );
10983 %}
10984
10985 instruct xchgP( memory mem, rRegP newval) %{
10986 match(Set newval (GetAndSetP mem newval));
10987 predicate(n->as_LoadStore()->barrier_data() == 0);
10988 format %{ "XCHGQ $newval,[$mem]" %}
10989 ins_encode %{
10990 __ xchgq($newval$$Register, $mem$$Address);
10991 %}
10992 ins_pipe( pipe_cmpxchg );
10993 %}
10994
10995 instruct xchgN( memory mem, rRegN newval) %{
10996 predicate(n->as_LoadStore()->barrier_data() == 0);
10997 match(Set newval (GetAndSetN mem newval));
10998 format %{ "XCHGL $newval,$mem]" %}
10999 ins_encode %{
11000 __ xchgl($newval$$Register, $mem$$Address);
11001 %}
11002 ins_pipe( pipe_cmpxchg );
11003 %}
11004
11005 //----------Abs Instructions-------------------------------------------
11006
11007 // Integer Absolute Instructions
11008 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11009 %{
11010 match(Set dst (AbsI src));
11011 effect(TEMP dst, KILL cr);
11012 format %{ "xorl $dst, $dst\t# abs int\n\t"
11013 "subl $dst, $src\n\t"
11014 "cmovll $dst, $src" %}
11015 ins_encode %{
11016 __ xorl($dst$$Register, $dst$$Register);
11017 __ subl($dst$$Register, $src$$Register);
11018 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11019 %}
11020
11021 ins_pipe(ialu_reg_reg);
11022 %}
11023
11024 // Long Absolute Instructions
11025 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11026 %{
11027 match(Set dst (AbsL src));
11028 effect(TEMP dst, KILL cr);
11029 format %{ "xorl $dst, $dst\t# abs long\n\t"
11030 "subq $dst, $src\n\t"
11031 "cmovlq $dst, $src" %}
11032 ins_encode %{
11033 __ xorl($dst$$Register, $dst$$Register);
11034 __ subq($dst$$Register, $src$$Register);
11035 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11036 %}
11037
11038 ins_pipe(ialu_reg_reg);
11039 %}
11040
11041 //----------Subtraction Instructions-------------------------------------------
11042
11043 // Integer Subtraction Instructions
11044 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11045 %{
11046 predicate(!UseAPX);
11047 match(Set dst (SubI dst src));
11048 effect(KILL cr);
11049 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11050
11051 format %{ "subl $dst, $src\t# int" %}
11052 ins_encode %{
11053 __ subl($dst$$Register, $src$$Register);
11054 %}
11055 ins_pipe(ialu_reg_reg);
11056 %}
11057
11058 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11059 %{
11060 predicate(UseAPX);
11061 match(Set dst (SubI src1 src2));
11062 effect(KILL cr);
11063 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11064
11065 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11066 ins_encode %{
11067 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11068 %}
11069 ins_pipe(ialu_reg_reg);
11070 %}
11071
11072 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11073 %{
11074 predicate(UseAPX);
11075 match(Set dst (SubI src1 src2));
11076 effect(KILL cr);
11077 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11078
11079 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11080 ins_encode %{
11081 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11082 %}
11083 ins_pipe(ialu_reg_reg);
11084 %}
11085
11086 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11087 %{
11088 predicate(UseAPX);
11089 match(Set dst (SubI (LoadI src1) src2));
11090 effect(KILL cr);
11091 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092
11093 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11094 ins_encode %{
11095 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11096 %}
11097 ins_pipe(ialu_reg_reg);
11098 %}
11099
11100 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11101 %{
11102 predicate(!UseAPX);
11103 match(Set dst (SubI dst (LoadI src)));
11104 effect(KILL cr);
11105 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11106
11107 ins_cost(150);
11108 format %{ "subl $dst, $src\t# int" %}
11109 ins_encode %{
11110 __ subl($dst$$Register, $src$$Address);
11111 %}
11112 ins_pipe(ialu_reg_mem);
11113 %}
11114
11115 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11116 %{
11117 predicate(UseAPX);
11118 match(Set dst (SubI src1 (LoadI src2)));
11119 effect(KILL cr);
11120 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11121
11122 ins_cost(150);
11123 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11124 ins_encode %{
11125 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11126 %}
11127 ins_pipe(ialu_reg_mem);
11128 %}
11129
11130 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11131 %{
11132 predicate(UseAPX);
11133 match(Set dst (SubI (LoadI src1) src2));
11134 effect(KILL cr);
11135 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11136
11137 ins_cost(150);
11138 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11139 ins_encode %{
11140 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11141 %}
11142 ins_pipe(ialu_reg_mem);
11143 %}
11144
11145 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11146 %{
11147 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11148 effect(KILL cr);
11149 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11150
11151 ins_cost(150);
11152 format %{ "subl $dst, $src\t# int" %}
11153 ins_encode %{
11154 __ subl($dst$$Address, $src$$Register);
11155 %}
11156 ins_pipe(ialu_mem_reg);
11157 %}
11158
11159 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11160 %{
11161 predicate(!UseAPX);
11162 match(Set dst (SubL dst src));
11163 effect(KILL cr);
11164 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11165
11166 format %{ "subq $dst, $src\t# long" %}
11167 ins_encode %{
11168 __ subq($dst$$Register, $src$$Register);
11169 %}
11170 ins_pipe(ialu_reg_reg);
11171 %}
11172
11173 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11174 %{
11175 predicate(UseAPX);
11176 match(Set dst (SubL src1 src2));
11177 effect(KILL cr);
11178 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11179
11180 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11181 ins_encode %{
11182 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11183 %}
11184 ins_pipe(ialu_reg_reg);
11185 %}
11186
11187 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11188 %{
11189 predicate(UseAPX);
11190 match(Set dst (SubL src1 src2));
11191 effect(KILL cr);
11192 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11193
11194 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11195 ins_encode %{
11196 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11197 %}
11198 ins_pipe(ialu_reg_reg);
11199 %}
11200
11201 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11202 %{
11203 predicate(UseAPX);
11204 match(Set dst (SubL (LoadL src1) src2));
11205 effect(KILL cr);
11206 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11207
11208 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11209 ins_encode %{
11210 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11211 %}
11212 ins_pipe(ialu_reg_reg);
11213 %}
11214
11215 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11216 %{
11217 predicate(!UseAPX);
11218 match(Set dst (SubL dst (LoadL src)));
11219 effect(KILL cr);
11220 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11221
11222 ins_cost(150);
11223 format %{ "subq $dst, $src\t# long" %}
11224 ins_encode %{
11225 __ subq($dst$$Register, $src$$Address);
11226 %}
11227 ins_pipe(ialu_reg_mem);
11228 %}
11229
11230 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11231 %{
11232 predicate(UseAPX);
11233 match(Set dst (SubL src1 (LoadL src2)));
11234 effect(KILL cr);
11235 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11236
11237 ins_cost(150);
11238 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11239 ins_encode %{
11240 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11241 %}
11242 ins_pipe(ialu_reg_mem);
11243 %}
11244
11245 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11246 %{
11247 predicate(UseAPX);
11248 match(Set dst (SubL (LoadL src1) src2));
11249 effect(KILL cr);
11250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11251
11252 ins_cost(150);
11253 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11254 ins_encode %{
11255 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11256 %}
11257 ins_pipe(ialu_reg_mem);
11258 %}
11259
11260 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11261 %{
11262 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11263 effect(KILL cr);
11264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11265
11266 ins_cost(150);
11267 format %{ "subq $dst, $src\t# long" %}
11268 ins_encode %{
11269 __ subq($dst$$Address, $src$$Register);
11270 %}
11271 ins_pipe(ialu_mem_reg);
11272 %}
11273
11274 // Subtract from a pointer
11275 // XXX hmpf???
11276 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11277 %{
11278 match(Set dst (AddP dst (SubI zero src)));
11279 effect(KILL cr);
11280
11281 format %{ "subq $dst, $src\t# ptr - int" %}
11282 ins_encode %{
11283 __ subq($dst$$Register, $src$$Register);
11284 %}
11285 ins_pipe(ialu_reg_reg);
11286 %}
11287
11288 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11289 %{
11290 predicate(!UseAPX);
11291 match(Set dst (SubI zero dst));
11292 effect(KILL cr);
11293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11294
11295 format %{ "negl $dst\t# int" %}
11296 ins_encode %{
11297 __ negl($dst$$Register);
11298 %}
11299 ins_pipe(ialu_reg);
11300 %}
11301
11302 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11303 %{
11304 predicate(UseAPX);
11305 match(Set dst (SubI zero src));
11306 effect(KILL cr);
11307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11308
11309 format %{ "enegl $dst, $src\t# int ndd" %}
11310 ins_encode %{
11311 __ enegl($dst$$Register, $src$$Register, false);
11312 %}
11313 ins_pipe(ialu_reg);
11314 %}
11315
11316 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11317 %{
11318 predicate(!UseAPX);
11319 match(Set dst (NegI dst));
11320 effect(KILL cr);
11321 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11322
11323 format %{ "negl $dst\t# int" %}
11324 ins_encode %{
11325 __ negl($dst$$Register);
11326 %}
11327 ins_pipe(ialu_reg);
11328 %}
11329
11330 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11331 %{
11332 predicate(UseAPX);
11333 match(Set dst (NegI src));
11334 effect(KILL cr);
11335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11336
11337 format %{ "enegl $dst, $src\t# int ndd" %}
11338 ins_encode %{
11339 __ enegl($dst$$Register, $src$$Register, false);
11340 %}
11341 ins_pipe(ialu_reg);
11342 %}
11343
11344 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11345 %{
11346 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11347 effect(KILL cr);
11348 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11349
11350 format %{ "negl $dst\t# int" %}
11351 ins_encode %{
11352 __ negl($dst$$Address);
11353 %}
11354 ins_pipe(ialu_reg);
11355 %}
11356
11357 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11358 %{
11359 predicate(!UseAPX);
11360 match(Set dst (SubL zero dst));
11361 effect(KILL cr);
11362 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11363
11364 format %{ "negq $dst\t# long" %}
11365 ins_encode %{
11366 __ negq($dst$$Register);
11367 %}
11368 ins_pipe(ialu_reg);
11369 %}
11370
11371 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11372 %{
11373 predicate(UseAPX);
11374 match(Set dst (SubL zero src));
11375 effect(KILL cr);
11376 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11377
11378 format %{ "enegq $dst, $src\t# long ndd" %}
11379 ins_encode %{
11380 __ enegq($dst$$Register, $src$$Register, false);
11381 %}
11382 ins_pipe(ialu_reg);
11383 %}
11384
11385 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11386 %{
11387 predicate(!UseAPX);
11388 match(Set dst (NegL dst));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391
11392 format %{ "negq $dst\t# int" %}
11393 ins_encode %{
11394 __ negq($dst$$Register);
11395 %}
11396 ins_pipe(ialu_reg);
11397 %}
11398
11399 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11400 %{
11401 predicate(UseAPX);
11402 match(Set dst (NegL src));
11403 effect(KILL cr);
11404 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11405
11406 format %{ "enegq $dst, $src\t# long ndd" %}
11407 ins_encode %{
11408 __ enegq($dst$$Register, $src$$Register, false);
11409 %}
11410 ins_pipe(ialu_reg);
11411 %}
11412
11413 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11414 %{
11415 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11416 effect(KILL cr);
11417 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11418
11419 format %{ "negq $dst\t# long" %}
11420 ins_encode %{
11421 __ negq($dst$$Address);
11422 %}
11423 ins_pipe(ialu_reg);
11424 %}
11425
11426 //----------Multiplication/Division Instructions-------------------------------
11427 // Integer Multiplication Instructions
11428 // Multiply Register
11429
11430 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11431 %{
11432 predicate(!UseAPX);
11433 match(Set dst (MulI dst src));
11434 effect(KILL cr);
11435
11436 ins_cost(300);
11437 format %{ "imull $dst, $src\t# int" %}
11438 ins_encode %{
11439 __ imull($dst$$Register, $src$$Register);
11440 %}
11441 ins_pipe(ialu_reg_reg_alu0);
11442 %}
11443
11444 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11445 %{
11446 predicate(UseAPX);
11447 match(Set dst (MulI src1 src2));
11448 effect(KILL cr);
11449 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11450
11451 ins_cost(300);
11452 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11453 ins_encode %{
11454 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11455 %}
11456 ins_pipe(ialu_reg_reg_alu0);
11457 %}
11458
11459 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11460 %{
11461 match(Set dst (MulI src imm));
11462 effect(KILL cr);
11463
11464 ins_cost(300);
11465 format %{ "imull $dst, $src, $imm\t# int" %}
11466 ins_encode %{
11467 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11468 %}
11469 ins_pipe(ialu_reg_reg_alu0);
11470 %}
11471
11472 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11473 %{
11474 predicate(!UseAPX);
11475 match(Set dst (MulI dst (LoadI src)));
11476 effect(KILL cr);
11477
11478 ins_cost(350);
11479 format %{ "imull $dst, $src\t# int" %}
11480 ins_encode %{
11481 __ imull($dst$$Register, $src$$Address);
11482 %}
11483 ins_pipe(ialu_reg_mem_alu0);
11484 %}
11485
11486 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11487 %{
11488 predicate(UseAPX);
11489 match(Set dst (MulI src1 (LoadI src2)));
11490 effect(KILL cr);
11491 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11492
11493 ins_cost(350);
11494 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11495 ins_encode %{
11496 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11497 %}
11498 ins_pipe(ialu_reg_mem_alu0);
11499 %}
11500
11501 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11502 %{
11503 match(Set dst (MulI (LoadI src) imm));
11504 effect(KILL cr);
11505
11506 ins_cost(300);
11507 format %{ "imull $dst, $src, $imm\t# int" %}
11508 ins_encode %{
11509 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11510 %}
11511 ins_pipe(ialu_reg_mem_alu0);
11512 %}
11513
11514 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11515 %{
11516 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11517 effect(KILL cr, KILL src2);
11518
11519 expand %{ mulI_rReg(dst, src1, cr);
11520 mulI_rReg(src2, src3, cr);
11521 addI_rReg(dst, src2, cr); %}
11522 %}
11523
11524 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11525 %{
11526 predicate(!UseAPX);
11527 match(Set dst (MulL dst src));
11528 effect(KILL cr);
11529
11530 ins_cost(300);
11531 format %{ "imulq $dst, $src\t# long" %}
11532 ins_encode %{
11533 __ imulq($dst$$Register, $src$$Register);
11534 %}
11535 ins_pipe(ialu_reg_reg_alu0);
11536 %}
11537
11538 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11539 %{
11540 predicate(UseAPX);
11541 match(Set dst (MulL src1 src2));
11542 effect(KILL cr);
11543 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11544
11545 ins_cost(300);
11546 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11547 ins_encode %{
11548 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11549 %}
11550 ins_pipe(ialu_reg_reg_alu0);
11551 %}
11552
11553 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11554 %{
11555 match(Set dst (MulL src imm));
11556 effect(KILL cr);
11557
11558 ins_cost(300);
11559 format %{ "imulq $dst, $src, $imm\t# long" %}
11560 ins_encode %{
11561 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11562 %}
11563 ins_pipe(ialu_reg_reg_alu0);
11564 %}
11565
11566 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11567 %{
11568 predicate(!UseAPX);
11569 match(Set dst (MulL dst (LoadL src)));
11570 effect(KILL cr);
11571
11572 ins_cost(350);
11573 format %{ "imulq $dst, $src\t# long" %}
11574 ins_encode %{
11575 __ imulq($dst$$Register, $src$$Address);
11576 %}
11577 ins_pipe(ialu_reg_mem_alu0);
11578 %}
11579
11580 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11581 %{
11582 predicate(UseAPX);
11583 match(Set dst (MulL src1 (LoadL src2)));
11584 effect(KILL cr);
11585 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11586
11587 ins_cost(350);
11588 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11589 ins_encode %{
11590 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11591 %}
11592 ins_pipe(ialu_reg_mem_alu0);
11593 %}
11594
11595 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11596 %{
11597 match(Set dst (MulL (LoadL src) imm));
11598 effect(KILL cr);
11599
11600 ins_cost(300);
11601 format %{ "imulq $dst, $src, $imm\t# long" %}
11602 ins_encode %{
11603 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11604 %}
11605 ins_pipe(ialu_reg_mem_alu0);
11606 %}
11607
11608 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11609 %{
11610 match(Set dst (MulHiL src rax));
11611 effect(USE_KILL rax, KILL cr);
11612
11613 ins_cost(300);
11614 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11615 ins_encode %{
11616 __ imulq($src$$Register);
11617 %}
11618 ins_pipe(ialu_reg_reg_alu0);
11619 %}
11620
11621 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11622 %{
11623 match(Set dst (UMulHiL src rax));
11624 effect(USE_KILL rax, KILL cr);
11625
11626 ins_cost(300);
11627 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11628 ins_encode %{
11629 __ mulq($src$$Register);
11630 %}
11631 ins_pipe(ialu_reg_reg_alu0);
11632 %}
11633
11634 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11635 rFlagsReg cr)
11636 %{
11637 match(Set rax (DivI rax div));
11638 effect(KILL rdx, KILL cr);
11639
11640 ins_cost(30*100+10*100); // XXX
11641 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11642 "jne,s normal\n\t"
11643 "xorl rdx, rdx\n\t"
11644 "cmpl $div, -1\n\t"
11645 "je,s done\n"
11646 "normal: cdql\n\t"
11647 "idivl $div\n"
11648 "done:" %}
11649 ins_encode(cdql_enc(div));
11650 ins_pipe(ialu_reg_reg_alu0);
11651 %}
11652
11653 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11654 rFlagsReg cr)
11655 %{
11656 match(Set rax (DivL rax div));
11657 effect(KILL rdx, KILL cr);
11658
11659 ins_cost(30*100+10*100); // XXX
11660 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11661 "cmpq rax, rdx\n\t"
11662 "jne,s normal\n\t"
11663 "xorl rdx, rdx\n\t"
11664 "cmpq $div, -1\n\t"
11665 "je,s done\n"
11666 "normal: cdqq\n\t"
11667 "idivq $div\n"
11668 "done:" %}
11669 ins_encode(cdqq_enc(div));
11670 ins_pipe(ialu_reg_reg_alu0);
11671 %}
11672
11673 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11674 %{
11675 match(Set rax (UDivI rax div));
11676 effect(KILL rdx, KILL cr);
11677
11678 ins_cost(300);
11679 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11680 ins_encode %{
11681 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11682 %}
11683 ins_pipe(ialu_reg_reg_alu0);
11684 %}
11685
11686 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11687 %{
11688 match(Set rax (UDivL rax div));
11689 effect(KILL rdx, KILL cr);
11690
11691 ins_cost(300);
11692 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11693 ins_encode %{
11694 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11695 %}
11696 ins_pipe(ialu_reg_reg_alu0);
11697 %}
11698
11699 // Integer DIVMOD with Register, both quotient and mod results
11700 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11701 rFlagsReg cr)
11702 %{
11703 match(DivModI rax div);
11704 effect(KILL cr);
11705
11706 ins_cost(30*100+10*100); // XXX
11707 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11708 "jne,s normal\n\t"
11709 "xorl rdx, rdx\n\t"
11710 "cmpl $div, -1\n\t"
11711 "je,s done\n"
11712 "normal: cdql\n\t"
11713 "idivl $div\n"
11714 "done:" %}
11715 ins_encode(cdql_enc(div));
11716 ins_pipe(pipe_slow);
11717 %}
11718
11719 // Long DIVMOD with Register, both quotient and mod results
11720 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11721 rFlagsReg cr)
11722 %{
11723 match(DivModL rax div);
11724 effect(KILL cr);
11725
11726 ins_cost(30*100+10*100); // XXX
11727 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11728 "cmpq rax, rdx\n\t"
11729 "jne,s normal\n\t"
11730 "xorl rdx, rdx\n\t"
11731 "cmpq $div, -1\n\t"
11732 "je,s done\n"
11733 "normal: cdqq\n\t"
11734 "idivq $div\n"
11735 "done:" %}
11736 ins_encode(cdqq_enc(div));
11737 ins_pipe(pipe_slow);
11738 %}
11739
11740 // Unsigned integer DIVMOD with Register, both quotient and mod results
11741 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11742 no_rax_rdx_RegI div, rFlagsReg cr)
11743 %{
11744 match(UDivModI rax div);
11745 effect(TEMP tmp, KILL cr);
11746
11747 ins_cost(300);
11748 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11749 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11750 %}
11751 ins_encode %{
11752 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11753 %}
11754 ins_pipe(pipe_slow);
11755 %}
11756
11757 // Unsigned long DIVMOD with Register, both quotient and mod results
11758 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11759 no_rax_rdx_RegL div, rFlagsReg cr)
11760 %{
11761 match(UDivModL rax div);
11762 effect(TEMP tmp, KILL cr);
11763
11764 ins_cost(300);
11765 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11766 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11767 %}
11768 ins_encode %{
11769 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11770 %}
11771 ins_pipe(pipe_slow);
11772 %}
11773
11774 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11775 rFlagsReg cr)
11776 %{
11777 match(Set rdx (ModI rax div));
11778 effect(KILL rax, KILL cr);
11779
11780 ins_cost(300); // XXX
11781 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11782 "jne,s normal\n\t"
11783 "xorl rdx, rdx\n\t"
11784 "cmpl $div, -1\n\t"
11785 "je,s done\n"
11786 "normal: cdql\n\t"
11787 "idivl $div\n"
11788 "done:" %}
11789 ins_encode(cdql_enc(div));
11790 ins_pipe(ialu_reg_reg_alu0);
11791 %}
11792
11793 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11794 rFlagsReg cr)
11795 %{
11796 match(Set rdx (ModL rax div));
11797 effect(KILL rax, KILL cr);
11798
11799 ins_cost(300); // XXX
11800 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11801 "cmpq rax, rdx\n\t"
11802 "jne,s normal\n\t"
11803 "xorl rdx, rdx\n\t"
11804 "cmpq $div, -1\n\t"
11805 "je,s done\n"
11806 "normal: cdqq\n\t"
11807 "idivq $div\n"
11808 "done:" %}
11809 ins_encode(cdqq_enc(div));
11810 ins_pipe(ialu_reg_reg_alu0);
11811 %}
11812
11813 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11814 %{
11815 match(Set rdx (UModI rax div));
11816 effect(KILL rax, KILL cr);
11817
11818 ins_cost(300);
11819 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11820 ins_encode %{
11821 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11822 %}
11823 ins_pipe(ialu_reg_reg_alu0);
11824 %}
11825
11826 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11827 %{
11828 match(Set rdx (UModL rax div));
11829 effect(KILL rax, KILL cr);
11830
11831 ins_cost(300);
11832 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11833 ins_encode %{
11834 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11835 %}
11836 ins_pipe(ialu_reg_reg_alu0);
11837 %}
11838
11839 // Integer Shift Instructions
11840 // Shift Left by one, two, three
11841 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11842 %{
11843 predicate(!UseAPX);
11844 match(Set dst (LShiftI dst shift));
11845 effect(KILL cr);
11846
11847 format %{ "sall $dst, $shift" %}
11848 ins_encode %{
11849 __ sall($dst$$Register, $shift$$constant);
11850 %}
11851 ins_pipe(ialu_reg);
11852 %}
11853
11854 // Shift Left by one, two, three
11855 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11856 %{
11857 predicate(UseAPX);
11858 match(Set dst (LShiftI src shift));
11859 effect(KILL cr);
11860 flag(PD::Flag_ndd_demotable_opr1);
11861
11862 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11863 ins_encode %{
11864 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11865 %}
11866 ins_pipe(ialu_reg);
11867 %}
11868
11869 // Shift Left by 8-bit immediate
11870 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11871 %{
11872 predicate(!UseAPX);
11873 match(Set dst (LShiftI dst shift));
11874 effect(KILL cr);
11875
11876 format %{ "sall $dst, $shift" %}
11877 ins_encode %{
11878 __ sall($dst$$Register, $shift$$constant);
11879 %}
11880 ins_pipe(ialu_reg);
11881 %}
11882
11883 // Shift Left by 8-bit immediate
11884 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11885 %{
11886 predicate(UseAPX);
11887 match(Set dst (LShiftI src shift));
11888 effect(KILL cr);
11889 flag(PD::Flag_ndd_demotable_opr1);
11890
11891 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11892 ins_encode %{
11893 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11894 %}
11895 ins_pipe(ialu_reg);
11896 %}
11897
11898 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11899 %{
11900 predicate(UseAPX);
11901 match(Set dst (LShiftI (LoadI src) shift));
11902 effect(KILL cr);
11903
11904 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11905 ins_encode %{
11906 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11907 %}
11908 ins_pipe(ialu_reg);
11909 %}
11910
11911 // Shift Left by 8-bit immediate
11912 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11913 %{
11914 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11915 effect(KILL cr);
11916
11917 format %{ "sall $dst, $shift" %}
11918 ins_encode %{
11919 __ sall($dst$$Address, $shift$$constant);
11920 %}
11921 ins_pipe(ialu_mem_imm);
11922 %}
11923
11924 // Shift Left by variable
11925 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11926 %{
11927 predicate(!VM_Version::supports_bmi2());
11928 match(Set dst (LShiftI dst shift));
11929 effect(KILL cr);
11930
11931 format %{ "sall $dst, $shift" %}
11932 ins_encode %{
11933 __ sall($dst$$Register);
11934 %}
11935 ins_pipe(ialu_reg_reg);
11936 %}
11937
11938 // Shift Left by variable
11939 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11940 %{
11941 predicate(!VM_Version::supports_bmi2());
11942 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11943 effect(KILL cr);
11944
11945 format %{ "sall $dst, $shift" %}
11946 ins_encode %{
11947 __ sall($dst$$Address);
11948 %}
11949 ins_pipe(ialu_mem_reg);
11950 %}
11951
11952 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11953 %{
11954 predicate(VM_Version::supports_bmi2());
11955 match(Set dst (LShiftI src shift));
11956
11957 format %{ "shlxl $dst, $src, $shift" %}
11958 ins_encode %{
11959 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11960 %}
11961 ins_pipe(ialu_reg_reg);
11962 %}
11963
11964 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11965 %{
11966 predicate(VM_Version::supports_bmi2());
11967 match(Set dst (LShiftI (LoadI src) shift));
11968 ins_cost(175);
11969 format %{ "shlxl $dst, $src, $shift" %}
11970 ins_encode %{
11971 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11972 %}
11973 ins_pipe(ialu_reg_mem);
11974 %}
11975
11976 // Arithmetic Shift Right by 8-bit immediate
11977 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11978 %{
11979 predicate(!UseAPX);
11980 match(Set dst (RShiftI dst shift));
11981 effect(KILL cr);
11982
11983 format %{ "sarl $dst, $shift" %}
11984 ins_encode %{
11985 __ sarl($dst$$Register, $shift$$constant);
11986 %}
11987 ins_pipe(ialu_mem_imm);
11988 %}
11989
11990 // Arithmetic Shift Right by 8-bit immediate
11991 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11992 %{
11993 predicate(UseAPX);
11994 match(Set dst (RShiftI src shift));
11995 effect(KILL cr);
11996 flag(PD::Flag_ndd_demotable_opr1);
11997
11998 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
11999 ins_encode %{
12000 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12001 %}
12002 ins_pipe(ialu_mem_imm);
12003 %}
12004
12005 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12006 %{
12007 predicate(UseAPX);
12008 match(Set dst (RShiftI (LoadI src) shift));
12009 effect(KILL cr);
12010
12011 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12012 ins_encode %{
12013 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12014 %}
12015 ins_pipe(ialu_mem_imm);
12016 %}
12017
12018 // Arithmetic Shift Right by 8-bit immediate
12019 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12020 %{
12021 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12022 effect(KILL cr);
12023
12024 format %{ "sarl $dst, $shift" %}
12025 ins_encode %{
12026 __ sarl($dst$$Address, $shift$$constant);
12027 %}
12028 ins_pipe(ialu_mem_imm);
12029 %}
12030
12031 // Arithmetic Shift Right by variable
12032 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12033 %{
12034 predicate(!VM_Version::supports_bmi2());
12035 match(Set dst (RShiftI dst shift));
12036 effect(KILL cr);
12037
12038 format %{ "sarl $dst, $shift" %}
12039 ins_encode %{
12040 __ sarl($dst$$Register);
12041 %}
12042 ins_pipe(ialu_reg_reg);
12043 %}
12044
12045 // Arithmetic Shift Right by variable
12046 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12047 %{
12048 predicate(!VM_Version::supports_bmi2());
12049 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12050 effect(KILL cr);
12051
12052 format %{ "sarl $dst, $shift" %}
12053 ins_encode %{
12054 __ sarl($dst$$Address);
12055 %}
12056 ins_pipe(ialu_mem_reg);
12057 %}
12058
12059 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12060 %{
12061 predicate(VM_Version::supports_bmi2());
12062 match(Set dst (RShiftI src shift));
12063
12064 format %{ "sarxl $dst, $src, $shift" %}
12065 ins_encode %{
12066 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12067 %}
12068 ins_pipe(ialu_reg_reg);
12069 %}
12070
12071 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12072 %{
12073 predicate(VM_Version::supports_bmi2());
12074 match(Set dst (RShiftI (LoadI src) shift));
12075 ins_cost(175);
12076 format %{ "sarxl $dst, $src, $shift" %}
12077 ins_encode %{
12078 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12079 %}
12080 ins_pipe(ialu_reg_mem);
12081 %}
12082
12083 // Logical Shift Right by 8-bit immediate
12084 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12085 %{
12086 predicate(!UseAPX);
12087 match(Set dst (URShiftI dst shift));
12088 effect(KILL cr);
12089
12090 format %{ "shrl $dst, $shift" %}
12091 ins_encode %{
12092 __ shrl($dst$$Register, $shift$$constant);
12093 %}
12094 ins_pipe(ialu_reg);
12095 %}
12096
12097 // Logical Shift Right by 8-bit immediate
12098 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12099 %{
12100 predicate(UseAPX);
12101 match(Set dst (URShiftI src shift));
12102 effect(KILL cr);
12103 flag(PD::Flag_ndd_demotable_opr1);
12104
12105 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12106 ins_encode %{
12107 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12108 %}
12109 ins_pipe(ialu_reg);
12110 %}
12111
12112 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12113 %{
12114 predicate(UseAPX);
12115 match(Set dst (URShiftI (LoadI src) shift));
12116 effect(KILL cr);
12117
12118 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12119 ins_encode %{
12120 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12121 %}
12122 ins_pipe(ialu_reg);
12123 %}
12124
12125 // Logical Shift Right by 8-bit immediate
12126 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12127 %{
12128 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12129 effect(KILL cr);
12130
12131 format %{ "shrl $dst, $shift" %}
12132 ins_encode %{
12133 __ shrl($dst$$Address, $shift$$constant);
12134 %}
12135 ins_pipe(ialu_mem_imm);
12136 %}
12137
12138 // Logical Shift Right by variable
12139 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12140 %{
12141 predicate(!VM_Version::supports_bmi2());
12142 match(Set dst (URShiftI dst shift));
12143 effect(KILL cr);
12144
12145 format %{ "shrl $dst, $shift" %}
12146 ins_encode %{
12147 __ shrl($dst$$Register);
12148 %}
12149 ins_pipe(ialu_reg_reg);
12150 %}
12151
12152 // Logical Shift Right by variable
12153 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12154 %{
12155 predicate(!VM_Version::supports_bmi2());
12156 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12157 effect(KILL cr);
12158
12159 format %{ "shrl $dst, $shift" %}
12160 ins_encode %{
12161 __ shrl($dst$$Address);
12162 %}
12163 ins_pipe(ialu_mem_reg);
12164 %}
12165
12166 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12167 %{
12168 predicate(VM_Version::supports_bmi2());
12169 match(Set dst (URShiftI src shift));
12170
12171 format %{ "shrxl $dst, $src, $shift" %}
12172 ins_encode %{
12173 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12174 %}
12175 ins_pipe(ialu_reg_reg);
12176 %}
12177
12178 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12179 %{
12180 predicate(VM_Version::supports_bmi2());
12181 match(Set dst (URShiftI (LoadI src) shift));
12182 ins_cost(175);
12183 format %{ "shrxl $dst, $src, $shift" %}
12184 ins_encode %{
12185 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12186 %}
12187 ins_pipe(ialu_reg_mem);
12188 %}
12189
12190 // Long Shift Instructions
12191 // Shift Left by one, two, three
12192 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12193 %{
12194 predicate(!UseAPX);
12195 match(Set dst (LShiftL dst shift));
12196 effect(KILL cr);
12197
12198 format %{ "salq $dst, $shift" %}
12199 ins_encode %{
12200 __ salq($dst$$Register, $shift$$constant);
12201 %}
12202 ins_pipe(ialu_reg);
12203 %}
12204
12205 // Shift Left by one, two, three
12206 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12207 %{
12208 predicate(UseAPX);
12209 match(Set dst (LShiftL src shift));
12210 effect(KILL cr);
12211 flag(PD::Flag_ndd_demotable_opr1);
12212
12213 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12214 ins_encode %{
12215 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12216 %}
12217 ins_pipe(ialu_reg);
12218 %}
12219
12220 // Shift Left by 8-bit immediate
12221 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12222 %{
12223 predicate(!UseAPX);
12224 match(Set dst (LShiftL dst shift));
12225 effect(KILL cr);
12226
12227 format %{ "salq $dst, $shift" %}
12228 ins_encode %{
12229 __ salq($dst$$Register, $shift$$constant);
12230 %}
12231 ins_pipe(ialu_reg);
12232 %}
12233
12234 // Shift Left by 8-bit immediate
12235 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12236 %{
12237 predicate(UseAPX);
12238 match(Set dst (LShiftL src shift));
12239 effect(KILL cr);
12240 flag(PD::Flag_ndd_demotable_opr1);
12241
12242 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12243 ins_encode %{
12244 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12245 %}
12246 ins_pipe(ialu_reg);
12247 %}
12248
12249 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12250 %{
12251 predicate(UseAPX);
12252 match(Set dst (LShiftL (LoadL src) shift));
12253 effect(KILL cr);
12254
12255 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12256 ins_encode %{
12257 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12258 %}
12259 ins_pipe(ialu_reg);
12260 %}
12261
12262 // Shift Left by 8-bit immediate
12263 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12264 %{
12265 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12266 effect(KILL cr);
12267
12268 format %{ "salq $dst, $shift" %}
12269 ins_encode %{
12270 __ salq($dst$$Address, $shift$$constant);
12271 %}
12272 ins_pipe(ialu_mem_imm);
12273 %}
12274
12275 // Shift Left by variable
12276 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12277 %{
12278 predicate(!VM_Version::supports_bmi2());
12279 match(Set dst (LShiftL dst shift));
12280 effect(KILL cr);
12281
12282 format %{ "salq $dst, $shift" %}
12283 ins_encode %{
12284 __ salq($dst$$Register);
12285 %}
12286 ins_pipe(ialu_reg_reg);
12287 %}
12288
12289 // Shift Left by variable
12290 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12291 %{
12292 predicate(!VM_Version::supports_bmi2());
12293 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12294 effect(KILL cr);
12295
12296 format %{ "salq $dst, $shift" %}
12297 ins_encode %{
12298 __ salq($dst$$Address);
12299 %}
12300 ins_pipe(ialu_mem_reg);
12301 %}
12302
12303 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12304 %{
12305 predicate(VM_Version::supports_bmi2());
12306 match(Set dst (LShiftL src shift));
12307
12308 format %{ "shlxq $dst, $src, $shift" %}
12309 ins_encode %{
12310 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12311 %}
12312 ins_pipe(ialu_reg_reg);
12313 %}
12314
12315 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12316 %{
12317 predicate(VM_Version::supports_bmi2());
12318 match(Set dst (LShiftL (LoadL src) shift));
12319 ins_cost(175);
12320 format %{ "shlxq $dst, $src, $shift" %}
12321 ins_encode %{
12322 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12323 %}
12324 ins_pipe(ialu_reg_mem);
12325 %}
12326
12327 // Arithmetic Shift Right by 8-bit immediate
12328 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12329 %{
12330 predicate(!UseAPX);
12331 match(Set dst (RShiftL dst shift));
12332 effect(KILL cr);
12333
12334 format %{ "sarq $dst, $shift" %}
12335 ins_encode %{
12336 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12337 %}
12338 ins_pipe(ialu_mem_imm);
12339 %}
12340
12341 // Arithmetic Shift Right by 8-bit immediate
12342 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12343 %{
12344 predicate(UseAPX);
12345 match(Set dst (RShiftL src shift));
12346 effect(KILL cr);
12347 flag(PD::Flag_ndd_demotable_opr1);
12348
12349 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12350 ins_encode %{
12351 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12352 %}
12353 ins_pipe(ialu_mem_imm);
12354 %}
12355
12356 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12357 %{
12358 predicate(UseAPX);
12359 match(Set dst (RShiftL (LoadL src) shift));
12360 effect(KILL cr);
12361
12362 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12363 ins_encode %{
12364 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12365 %}
12366 ins_pipe(ialu_mem_imm);
12367 %}
12368
12369 // Arithmetic Shift Right by 8-bit immediate
12370 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12371 %{
12372 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12373 effect(KILL cr);
12374
12375 format %{ "sarq $dst, $shift" %}
12376 ins_encode %{
12377 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12378 %}
12379 ins_pipe(ialu_mem_imm);
12380 %}
12381
12382 // Arithmetic Shift Right by variable
12383 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12384 %{
12385 predicate(!VM_Version::supports_bmi2());
12386 match(Set dst (RShiftL dst shift));
12387 effect(KILL cr);
12388
12389 format %{ "sarq $dst, $shift" %}
12390 ins_encode %{
12391 __ sarq($dst$$Register);
12392 %}
12393 ins_pipe(ialu_reg_reg);
12394 %}
12395
12396 // Arithmetic Shift Right by variable
12397 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12398 %{
12399 predicate(!VM_Version::supports_bmi2());
12400 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12401 effect(KILL cr);
12402
12403 format %{ "sarq $dst, $shift" %}
12404 ins_encode %{
12405 __ sarq($dst$$Address);
12406 %}
12407 ins_pipe(ialu_mem_reg);
12408 %}
12409
12410 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12411 %{
12412 predicate(VM_Version::supports_bmi2());
12413 match(Set dst (RShiftL src shift));
12414
12415 format %{ "sarxq $dst, $src, $shift" %}
12416 ins_encode %{
12417 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12418 %}
12419 ins_pipe(ialu_reg_reg);
12420 %}
12421
12422 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12423 %{
12424 predicate(VM_Version::supports_bmi2());
12425 match(Set dst (RShiftL (LoadL src) shift));
12426 ins_cost(175);
12427 format %{ "sarxq $dst, $src, $shift" %}
12428 ins_encode %{
12429 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12430 %}
12431 ins_pipe(ialu_reg_mem);
12432 %}
12433
12434 // Logical Shift Right by 8-bit immediate
12435 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12436 %{
12437 predicate(!UseAPX);
12438 match(Set dst (URShiftL dst shift));
12439 effect(KILL cr);
12440
12441 format %{ "shrq $dst, $shift" %}
12442 ins_encode %{
12443 __ shrq($dst$$Register, $shift$$constant);
12444 %}
12445 ins_pipe(ialu_reg);
12446 %}
12447
12448 // Logical Shift Right by 8-bit immediate
12449 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12450 %{
12451 predicate(UseAPX);
12452 match(Set dst (URShiftL src shift));
12453 effect(KILL cr);
12454 flag(PD::Flag_ndd_demotable_opr1);
12455
12456 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12457 ins_encode %{
12458 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12459 %}
12460 ins_pipe(ialu_reg);
12461 %}
12462
12463 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12464 %{
12465 predicate(UseAPX);
12466 match(Set dst (URShiftL (LoadL src) shift));
12467 effect(KILL cr);
12468
12469 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12470 ins_encode %{
12471 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12472 %}
12473 ins_pipe(ialu_reg);
12474 %}
12475
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12478 %{
12479 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12480 effect(KILL cr);
12481
12482 format %{ "shrq $dst, $shift" %}
12483 ins_encode %{
12484 __ shrq($dst$$Address, $shift$$constant);
12485 %}
12486 ins_pipe(ialu_mem_imm);
12487 %}
12488
12489 // Logical Shift Right by variable
12490 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12491 %{
12492 predicate(!VM_Version::supports_bmi2());
12493 match(Set dst (URShiftL dst shift));
12494 effect(KILL cr);
12495
12496 format %{ "shrq $dst, $shift" %}
12497 ins_encode %{
12498 __ shrq($dst$$Register);
12499 %}
12500 ins_pipe(ialu_reg_reg);
12501 %}
12502
12503 // Logical Shift Right by variable
12504 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12505 %{
12506 predicate(!VM_Version::supports_bmi2());
12507 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12508 effect(KILL cr);
12509
12510 format %{ "shrq $dst, $shift" %}
12511 ins_encode %{
12512 __ shrq($dst$$Address);
12513 %}
12514 ins_pipe(ialu_mem_reg);
12515 %}
12516
12517 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12518 %{
12519 predicate(VM_Version::supports_bmi2());
12520 match(Set dst (URShiftL src shift));
12521
12522 format %{ "shrxq $dst, $src, $shift" %}
12523 ins_encode %{
12524 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12525 %}
12526 ins_pipe(ialu_reg_reg);
12527 %}
12528
12529 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12530 %{
12531 predicate(VM_Version::supports_bmi2());
12532 match(Set dst (URShiftL (LoadL src) shift));
12533 ins_cost(175);
12534 format %{ "shrxq $dst, $src, $shift" %}
12535 ins_encode %{
12536 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12537 %}
12538 ins_pipe(ialu_reg_mem);
12539 %}
12540
12541 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12542 // This idiom is used by the compiler for the i2b bytecode.
12543 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12544 %{
12545 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12546
12547 format %{ "movsbl $dst, $src\t# i2b" %}
12548 ins_encode %{
12549 __ movsbl($dst$$Register, $src$$Register);
12550 %}
12551 ins_pipe(ialu_reg_reg);
12552 %}
12553
12554 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12555 // This idiom is used by the compiler the i2s bytecode.
12556 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12557 %{
12558 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12559
12560 format %{ "movswl $dst, $src\t# i2s" %}
12561 ins_encode %{
12562 __ movswl($dst$$Register, $src$$Register);
12563 %}
12564 ins_pipe(ialu_reg_reg);
12565 %}
12566
12567 // ROL/ROR instructions
12568
12569 // Rotate left by constant.
12570 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12571 %{
12572 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12573 match(Set dst (RotateLeft dst shift));
12574 effect(KILL cr);
12575 format %{ "roll $dst, $shift" %}
12576 ins_encode %{
12577 __ roll($dst$$Register, $shift$$constant);
12578 %}
12579 ins_pipe(ialu_reg);
12580 %}
12581
12582 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12583 %{
12584 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12585 match(Set dst (RotateLeft src shift));
12586 format %{ "rolxl $dst, $src, $shift" %}
12587 ins_encode %{
12588 int shift = 32 - ($shift$$constant & 31);
12589 __ rorxl($dst$$Register, $src$$Register, shift);
12590 %}
12591 ins_pipe(ialu_reg_reg);
12592 %}
12593
12594 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12595 %{
12596 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12597 match(Set dst (RotateLeft (LoadI src) shift));
12598 ins_cost(175);
12599 format %{ "rolxl $dst, $src, $shift" %}
12600 ins_encode %{
12601 int shift = 32 - ($shift$$constant & 31);
12602 __ rorxl($dst$$Register, $src$$Address, shift);
12603 %}
12604 ins_pipe(ialu_reg_mem);
12605 %}
12606
12607 // Rotate Left by variable
12608 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12609 %{
12610 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12611 match(Set dst (RotateLeft dst shift));
12612 effect(KILL cr);
12613 format %{ "roll $dst, $shift" %}
12614 ins_encode %{
12615 __ roll($dst$$Register);
12616 %}
12617 ins_pipe(ialu_reg_reg);
12618 %}
12619
12620 // Rotate Left by variable
12621 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12622 %{
12623 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12624 match(Set dst (RotateLeft src shift));
12625 effect(KILL cr);
12626 flag(PD::Flag_ndd_demotable_opr1);
12627
12628 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12629 ins_encode %{
12630 __ eroll($dst$$Register, $src$$Register, false);
12631 %}
12632 ins_pipe(ialu_reg_reg);
12633 %}
12634
12635 // Rotate Right by constant.
12636 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12637 %{
12638 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12639 match(Set dst (RotateRight dst shift));
12640 effect(KILL cr);
12641 format %{ "rorl $dst, $shift" %}
12642 ins_encode %{
12643 __ rorl($dst$$Register, $shift$$constant);
12644 %}
12645 ins_pipe(ialu_reg);
12646 %}
12647
12648 // Rotate Right by constant.
12649 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12650 %{
12651 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12652 match(Set dst (RotateRight src shift));
12653 format %{ "rorxl $dst, $src, $shift" %}
12654 ins_encode %{
12655 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12656 %}
12657 ins_pipe(ialu_reg_reg);
12658 %}
12659
12660 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12661 %{
12662 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12663 match(Set dst (RotateRight (LoadI src) shift));
12664 ins_cost(175);
12665 format %{ "rorxl $dst, $src, $shift" %}
12666 ins_encode %{
12667 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12668 %}
12669 ins_pipe(ialu_reg_mem);
12670 %}
12671
12672 // Rotate Right by variable
12673 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12674 %{
12675 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12676 match(Set dst (RotateRight dst shift));
12677 effect(KILL cr);
12678 format %{ "rorl $dst, $shift" %}
12679 ins_encode %{
12680 __ rorl($dst$$Register);
12681 %}
12682 ins_pipe(ialu_reg_reg);
12683 %}
12684
12685 // Rotate Right by variable
12686 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12687 %{
12688 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12689 match(Set dst (RotateRight src shift));
12690 effect(KILL cr);
12691 flag(PD::Flag_ndd_demotable_opr1);
12692
12693 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12694 ins_encode %{
12695 __ erorl($dst$$Register, $src$$Register, false);
12696 %}
12697 ins_pipe(ialu_reg_reg);
12698 %}
12699
12700 // Rotate Left by constant.
12701 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12702 %{
12703 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12704 match(Set dst (RotateLeft dst shift));
12705 effect(KILL cr);
12706 format %{ "rolq $dst, $shift" %}
12707 ins_encode %{
12708 __ rolq($dst$$Register, $shift$$constant);
12709 %}
12710 ins_pipe(ialu_reg);
12711 %}
12712
12713 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12714 %{
12715 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12716 match(Set dst (RotateLeft src shift));
12717 format %{ "rolxq $dst, $src, $shift" %}
12718 ins_encode %{
12719 int shift = 64 - ($shift$$constant & 63);
12720 __ rorxq($dst$$Register, $src$$Register, shift);
12721 %}
12722 ins_pipe(ialu_reg_reg);
12723 %}
12724
12725 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12726 %{
12727 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12728 match(Set dst (RotateLeft (LoadL src) shift));
12729 ins_cost(175);
12730 format %{ "rolxq $dst, $src, $shift" %}
12731 ins_encode %{
12732 int shift = 64 - ($shift$$constant & 63);
12733 __ rorxq($dst$$Register, $src$$Address, shift);
12734 %}
12735 ins_pipe(ialu_reg_mem);
12736 %}
12737
12738 // Rotate Left by variable
12739 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12740 %{
12741 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12742 match(Set dst (RotateLeft dst shift));
12743 effect(KILL cr);
12744
12745 format %{ "rolq $dst, $shift" %}
12746 ins_encode %{
12747 __ rolq($dst$$Register);
12748 %}
12749 ins_pipe(ialu_reg_reg);
12750 %}
12751
12752 // Rotate Left by variable
12753 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12754 %{
12755 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12756 match(Set dst (RotateLeft src shift));
12757 effect(KILL cr);
12758 flag(PD::Flag_ndd_demotable_opr1);
12759
12760 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12761 ins_encode %{
12762 __ erolq($dst$$Register, $src$$Register, false);
12763 %}
12764 ins_pipe(ialu_reg_reg);
12765 %}
12766
12767 // Rotate Right by constant.
12768 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12769 %{
12770 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12771 match(Set dst (RotateRight dst shift));
12772 effect(KILL cr);
12773 format %{ "rorq $dst, $shift" %}
12774 ins_encode %{
12775 __ rorq($dst$$Register, $shift$$constant);
12776 %}
12777 ins_pipe(ialu_reg);
12778 %}
12779
12780 // Rotate Right by constant
12781 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12782 %{
12783 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12784 match(Set dst (RotateRight src shift));
12785 format %{ "rorxq $dst, $src, $shift" %}
12786 ins_encode %{
12787 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12788 %}
12789 ins_pipe(ialu_reg_reg);
12790 %}
12791
12792 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12793 %{
12794 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12795 match(Set dst (RotateRight (LoadL src) shift));
12796 ins_cost(175);
12797 format %{ "rorxq $dst, $src, $shift" %}
12798 ins_encode %{
12799 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12800 %}
12801 ins_pipe(ialu_reg_mem);
12802 %}
12803
12804 // Rotate Right by variable
12805 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12806 %{
12807 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12808 match(Set dst (RotateRight dst shift));
12809 effect(KILL cr);
12810 format %{ "rorq $dst, $shift" %}
12811 ins_encode %{
12812 __ rorq($dst$$Register);
12813 %}
12814 ins_pipe(ialu_reg_reg);
12815 %}
12816
12817 // Rotate Right by variable
12818 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12819 %{
12820 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12821 match(Set dst (RotateRight src shift));
12822 effect(KILL cr);
12823 flag(PD::Flag_ndd_demotable_opr1);
12824
12825 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12826 ins_encode %{
12827 __ erorq($dst$$Register, $src$$Register, false);
12828 %}
12829 ins_pipe(ialu_reg_reg);
12830 %}
12831
12832 //----------------------------- CompressBits/ExpandBits ------------------------
12833
12834 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12835 predicate(n->bottom_type()->isa_long());
12836 match(Set dst (CompressBits src mask));
12837 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12838 ins_encode %{
12839 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12840 %}
12841 ins_pipe( pipe_slow );
12842 %}
12843
12844 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12845 predicate(n->bottom_type()->isa_long());
12846 match(Set dst (ExpandBits src mask));
12847 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12848 ins_encode %{
12849 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12850 %}
12851 ins_pipe( pipe_slow );
12852 %}
12853
12854 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12855 predicate(n->bottom_type()->isa_long());
12856 match(Set dst (CompressBits src (LoadL mask)));
12857 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12858 ins_encode %{
12859 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12860 %}
12861 ins_pipe( pipe_slow );
12862 %}
12863
12864 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12865 predicate(n->bottom_type()->isa_long());
12866 match(Set dst (ExpandBits src (LoadL mask)));
12867 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12868 ins_encode %{
12869 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12870 %}
12871 ins_pipe( pipe_slow );
12872 %}
12873
12874
12875 // Logical Instructions
12876
12877 // Integer Logical Instructions
12878
12879 // And Instructions
12880 // And Register with Register
12881 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12882 %{
12883 predicate(!UseAPX);
12884 match(Set dst (AndI dst src));
12885 effect(KILL cr);
12886 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12887
12888 format %{ "andl $dst, $src\t# int" %}
12889 ins_encode %{
12890 __ andl($dst$$Register, $src$$Register);
12891 %}
12892 ins_pipe(ialu_reg_reg);
12893 %}
12894
12895 // And Register with Register using New Data Destination (NDD)
12896 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12897 %{
12898 predicate(UseAPX);
12899 match(Set dst (AndI src1 src2));
12900 effect(KILL cr);
12901 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12902
12903 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12904 ins_encode %{
12905 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12906
12907 %}
12908 ins_pipe(ialu_reg_reg);
12909 %}
12910
12911 // And Register with Immediate 255
12912 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12913 %{
12914 match(Set dst (AndI src mask));
12915
12916 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12917 ins_encode %{
12918 __ movzbl($dst$$Register, $src$$Register);
12919 %}
12920 ins_pipe(ialu_reg);
12921 %}
12922
12923 // And Register with Immediate 255 and promote to long
12924 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12925 %{
12926 match(Set dst (ConvI2L (AndI src mask)));
12927
12928 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12929 ins_encode %{
12930 __ movzbl($dst$$Register, $src$$Register);
12931 %}
12932 ins_pipe(ialu_reg);
12933 %}
12934
12935 // And Register with Immediate 65535
12936 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12937 %{
12938 match(Set dst (AndI src mask));
12939
12940 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12941 ins_encode %{
12942 __ movzwl($dst$$Register, $src$$Register);
12943 %}
12944 ins_pipe(ialu_reg);
12945 %}
12946
12947 // And Register with Immediate 65535 and promote to long
12948 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12949 %{
12950 match(Set dst (ConvI2L (AndI src mask)));
12951
12952 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12953 ins_encode %{
12954 __ movzwl($dst$$Register, $src$$Register);
12955 %}
12956 ins_pipe(ialu_reg);
12957 %}
12958
12959 // Can skip int2long conversions after AND with small bitmask
12960 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12961 %{
12962 predicate(VM_Version::supports_bmi2());
12963 ins_cost(125);
12964 effect(TEMP tmp, KILL cr);
12965 match(Set dst (ConvI2L (AndI src mask)));
12966 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
12967 ins_encode %{
12968 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12969 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12970 %}
12971 ins_pipe(ialu_reg_reg);
12972 %}
12973
12974 // And Register with Immediate
12975 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12976 %{
12977 predicate(!UseAPX);
12978 match(Set dst (AndI dst src));
12979 effect(KILL cr);
12980 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12981
12982 format %{ "andl $dst, $src\t# int" %}
12983 ins_encode %{
12984 __ andl($dst$$Register, $src$$constant);
12985 %}
12986 ins_pipe(ialu_reg);
12987 %}
12988
12989 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12990 %{
12991 predicate(UseAPX);
12992 match(Set dst (AndI src1 src2));
12993 effect(KILL cr);
12994 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12995
12996 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12997 ins_encode %{
12998 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12999 %}
13000 ins_pipe(ialu_reg);
13001 %}
13002
13003 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13004 %{
13005 predicate(UseAPX);
13006 match(Set dst (AndI (LoadI src1) src2));
13007 effect(KILL cr);
13008 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13009
13010 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13011 ins_encode %{
13012 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13013 %}
13014 ins_pipe(ialu_reg);
13015 %}
13016
13017 // And Register with Memory
13018 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13019 %{
13020 predicate(!UseAPX);
13021 match(Set dst (AndI dst (LoadI src)));
13022 effect(KILL cr);
13023 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13024
13025 ins_cost(150);
13026 format %{ "andl $dst, $src\t# int" %}
13027 ins_encode %{
13028 __ andl($dst$$Register, $src$$Address);
13029 %}
13030 ins_pipe(ialu_reg_mem);
13031 %}
13032
13033 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13034 %{
13035 predicate(UseAPX);
13036 match(Set dst (AndI src1 (LoadI src2)));
13037 effect(KILL cr);
13038 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13039
13040 ins_cost(150);
13041 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13042 ins_encode %{
13043 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13044 %}
13045 ins_pipe(ialu_reg_mem);
13046 %}
13047
13048 // And Memory with Register
13049 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13050 %{
13051 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13052 effect(KILL cr);
13053 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13054
13055 ins_cost(150);
13056 format %{ "andb $dst, $src\t# byte" %}
13057 ins_encode %{
13058 __ andb($dst$$Address, $src$$Register);
13059 %}
13060 ins_pipe(ialu_mem_reg);
13061 %}
13062
13063 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13064 %{
13065 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13066 effect(KILL cr);
13067 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13068
13069 ins_cost(150);
13070 format %{ "andl $dst, $src\t# int" %}
13071 ins_encode %{
13072 __ andl($dst$$Address, $src$$Register);
13073 %}
13074 ins_pipe(ialu_mem_reg);
13075 %}
13076
13077 // And Memory with Immediate
13078 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13079 %{
13080 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13081 effect(KILL cr);
13082 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13083
13084 ins_cost(125);
13085 format %{ "andl $dst, $src\t# int" %}
13086 ins_encode %{
13087 __ andl($dst$$Address, $src$$constant);
13088 %}
13089 ins_pipe(ialu_mem_imm);
13090 %}
13091
13092 // BMI1 instructions
13093 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13094 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13095 predicate(UseBMI1Instructions);
13096 effect(KILL cr);
13097 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13098
13099 ins_cost(125);
13100 format %{ "andnl $dst, $src1, $src2" %}
13101
13102 ins_encode %{
13103 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13104 %}
13105 ins_pipe(ialu_reg_mem);
13106 %}
13107
13108 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13109 match(Set dst (AndI (XorI src1 minus_1) src2));
13110 predicate(UseBMI1Instructions);
13111 effect(KILL cr);
13112 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13113
13114 format %{ "andnl $dst, $src1, $src2" %}
13115
13116 ins_encode %{
13117 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13118 %}
13119 ins_pipe(ialu_reg);
13120 %}
13121
13122 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13123 match(Set dst (AndI (SubI imm_zero src) src));
13124 predicate(UseBMI1Instructions);
13125 effect(KILL cr);
13126 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13127
13128 format %{ "blsil $dst, $src" %}
13129
13130 ins_encode %{
13131 __ blsil($dst$$Register, $src$$Register);
13132 %}
13133 ins_pipe(ialu_reg);
13134 %}
13135
13136 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13137 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13138 predicate(UseBMI1Instructions);
13139 effect(KILL cr);
13140 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13141
13142 ins_cost(125);
13143 format %{ "blsil $dst, $src" %}
13144
13145 ins_encode %{
13146 __ blsil($dst$$Register, $src$$Address);
13147 %}
13148 ins_pipe(ialu_reg_mem);
13149 %}
13150
13151 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13152 %{
13153 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13154 predicate(UseBMI1Instructions);
13155 effect(KILL cr);
13156 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13157
13158 ins_cost(125);
13159 format %{ "blsmskl $dst, $src" %}
13160
13161 ins_encode %{
13162 __ blsmskl($dst$$Register, $src$$Address);
13163 %}
13164 ins_pipe(ialu_reg_mem);
13165 %}
13166
13167 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13168 %{
13169 match(Set dst (XorI (AddI src minus_1) src));
13170 predicate(UseBMI1Instructions);
13171 effect(KILL cr);
13172 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13173
13174 format %{ "blsmskl $dst, $src" %}
13175
13176 ins_encode %{
13177 __ blsmskl($dst$$Register, $src$$Register);
13178 %}
13179
13180 ins_pipe(ialu_reg);
13181 %}
13182
13183 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13184 %{
13185 match(Set dst (AndI (AddI src minus_1) src) );
13186 predicate(UseBMI1Instructions);
13187 effect(KILL cr);
13188 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13189
13190 format %{ "blsrl $dst, $src" %}
13191
13192 ins_encode %{
13193 __ blsrl($dst$$Register, $src$$Register);
13194 %}
13195
13196 ins_pipe(ialu_reg_mem);
13197 %}
13198
13199 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13200 %{
13201 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13202 predicate(UseBMI1Instructions);
13203 effect(KILL cr);
13204 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13205
13206 ins_cost(125);
13207 format %{ "blsrl $dst, $src" %}
13208
13209 ins_encode %{
13210 __ blsrl($dst$$Register, $src$$Address);
13211 %}
13212
13213 ins_pipe(ialu_reg);
13214 %}
13215
13216 // Or Instructions
13217 // Or Register with Register
13218 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13219 %{
13220 predicate(!UseAPX);
13221 match(Set dst (OrI dst src));
13222 effect(KILL cr);
13223 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13224
13225 format %{ "orl $dst, $src\t# int" %}
13226 ins_encode %{
13227 __ orl($dst$$Register, $src$$Register);
13228 %}
13229 ins_pipe(ialu_reg_reg);
13230 %}
13231
13232 // Or Register with Register using New Data Destination (NDD)
13233 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13234 %{
13235 predicate(UseAPX);
13236 match(Set dst (OrI src1 src2));
13237 effect(KILL cr);
13238 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13239
13240 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13241 ins_encode %{
13242 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13243 %}
13244 ins_pipe(ialu_reg_reg);
13245 %}
13246
13247 // Or Register with Immediate
13248 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13249 %{
13250 predicate(!UseAPX);
13251 match(Set dst (OrI dst src));
13252 effect(KILL cr);
13253 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13254
13255 format %{ "orl $dst, $src\t# int" %}
13256 ins_encode %{
13257 __ orl($dst$$Register, $src$$constant);
13258 %}
13259 ins_pipe(ialu_reg);
13260 %}
13261
13262 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13263 %{
13264 predicate(UseAPX);
13265 match(Set dst (OrI src1 src2));
13266 effect(KILL cr);
13267 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13268
13269 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13270 ins_encode %{
13271 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13272 %}
13273 ins_pipe(ialu_reg);
13274 %}
13275
13276 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13277 %{
13278 predicate(UseAPX);
13279 match(Set dst (OrI src1 src2));
13280 effect(KILL cr);
13281 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13282
13283 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13284 ins_encode %{
13285 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13286 %}
13287 ins_pipe(ialu_reg);
13288 %}
13289
13290 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13291 %{
13292 predicate(UseAPX);
13293 match(Set dst (OrI (LoadI src1) src2));
13294 effect(KILL cr);
13295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296
13297 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13298 ins_encode %{
13299 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13300 %}
13301 ins_pipe(ialu_reg);
13302 %}
13303
13304 // Or Register with Memory
13305 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13306 %{
13307 predicate(!UseAPX);
13308 match(Set dst (OrI dst (LoadI src)));
13309 effect(KILL cr);
13310 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13311
13312 ins_cost(150);
13313 format %{ "orl $dst, $src\t# int" %}
13314 ins_encode %{
13315 __ orl($dst$$Register, $src$$Address);
13316 %}
13317 ins_pipe(ialu_reg_mem);
13318 %}
13319
13320 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13321 %{
13322 predicate(UseAPX);
13323 match(Set dst (OrI src1 (LoadI src2)));
13324 effect(KILL cr);
13325 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13326
13327 ins_cost(150);
13328 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13329 ins_encode %{
13330 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13331 %}
13332 ins_pipe(ialu_reg_mem);
13333 %}
13334
13335 // Or Memory with Register
13336 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13337 %{
13338 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13339 effect(KILL cr);
13340 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13341
13342 ins_cost(150);
13343 format %{ "orb $dst, $src\t# byte" %}
13344 ins_encode %{
13345 __ orb($dst$$Address, $src$$Register);
13346 %}
13347 ins_pipe(ialu_mem_reg);
13348 %}
13349
13350 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13351 %{
13352 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13353 effect(KILL cr);
13354 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13355
13356 ins_cost(150);
13357 format %{ "orl $dst, $src\t# int" %}
13358 ins_encode %{
13359 __ orl($dst$$Address, $src$$Register);
13360 %}
13361 ins_pipe(ialu_mem_reg);
13362 %}
13363
13364 // Or Memory with Immediate
13365 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13366 %{
13367 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13368 effect(KILL cr);
13369 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13370
13371 ins_cost(125);
13372 format %{ "orl $dst, $src\t# int" %}
13373 ins_encode %{
13374 __ orl($dst$$Address, $src$$constant);
13375 %}
13376 ins_pipe(ialu_mem_imm);
13377 %}
13378
13379 // Xor Instructions
13380 // Xor Register with Register
13381 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13382 %{
13383 predicate(!UseAPX);
13384 match(Set dst (XorI dst src));
13385 effect(KILL cr);
13386 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13387
13388 format %{ "xorl $dst, $src\t# int" %}
13389 ins_encode %{
13390 __ xorl($dst$$Register, $src$$Register);
13391 %}
13392 ins_pipe(ialu_reg_reg);
13393 %}
13394
13395 // Xor Register with Register using New Data Destination (NDD)
13396 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13397 %{
13398 predicate(UseAPX);
13399 match(Set dst (XorI src1 src2));
13400 effect(KILL cr);
13401 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13402
13403 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13404 ins_encode %{
13405 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13406 %}
13407 ins_pipe(ialu_reg_reg);
13408 %}
13409
13410 // Xor Register with Immediate -1
13411 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13412 %{
13413 predicate(!UseAPX);
13414 match(Set dst (XorI dst imm));
13415
13416 format %{ "notl $dst" %}
13417 ins_encode %{
13418 __ notl($dst$$Register);
13419 %}
13420 ins_pipe(ialu_reg);
13421 %}
13422
13423 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13424 %{
13425 match(Set dst (XorI src imm));
13426 predicate(UseAPX);
13427 flag(PD::Flag_ndd_demotable_opr1);
13428
13429 format %{ "enotl $dst, $src" %}
13430 ins_encode %{
13431 __ enotl($dst$$Register, $src$$Register);
13432 %}
13433 ins_pipe(ialu_reg);
13434 %}
13435
13436 // Xor Register with Immediate
13437 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13438 %{
13439 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13440 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13441 match(Set dst (XorI dst src));
13442 effect(KILL cr);
13443 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13444
13445 format %{ "xorl $dst, $src\t# int" %}
13446 ins_encode %{
13447 __ xorl($dst$$Register, $src$$constant);
13448 %}
13449 ins_pipe(ialu_reg);
13450 %}
13451
13452 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13453 %{
13454 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13455 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13456 match(Set dst (XorI src1 src2));
13457 effect(KILL cr);
13458 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13459
13460 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13461 ins_encode %{
13462 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13463 %}
13464 ins_pipe(ialu_reg);
13465 %}
13466
13467 // Xor Memory with Immediate
13468 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13469 %{
13470 predicate(UseAPX);
13471 match(Set dst (XorI (LoadI src1) src2));
13472 effect(KILL cr);
13473 ins_cost(150);
13474 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13475
13476 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13477 ins_encode %{
13478 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13479 %}
13480 ins_pipe(ialu_reg);
13481 %}
13482
13483 // Xor Register with Memory
13484 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13485 %{
13486 predicate(!UseAPX);
13487 match(Set dst (XorI dst (LoadI src)));
13488 effect(KILL cr);
13489 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490
13491 ins_cost(150);
13492 format %{ "xorl $dst, $src\t# int" %}
13493 ins_encode %{
13494 __ xorl($dst$$Register, $src$$Address);
13495 %}
13496 ins_pipe(ialu_reg_mem);
13497 %}
13498
13499 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13500 %{
13501 predicate(UseAPX);
13502 match(Set dst (XorI src1 (LoadI src2)));
13503 effect(KILL cr);
13504 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13505
13506 ins_cost(150);
13507 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13508 ins_encode %{
13509 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13510 %}
13511 ins_pipe(ialu_reg_mem);
13512 %}
13513
13514 // Xor Memory with Register
13515 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13516 %{
13517 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13518 effect(KILL cr);
13519 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13520
13521 ins_cost(150);
13522 format %{ "xorb $dst, $src\t# byte" %}
13523 ins_encode %{
13524 __ xorb($dst$$Address, $src$$Register);
13525 %}
13526 ins_pipe(ialu_mem_reg);
13527 %}
13528
13529 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13530 %{
13531 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13532 effect(KILL cr);
13533 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13534
13535 ins_cost(150);
13536 format %{ "xorl $dst, $src\t# int" %}
13537 ins_encode %{
13538 __ xorl($dst$$Address, $src$$Register);
13539 %}
13540 ins_pipe(ialu_mem_reg);
13541 %}
13542
13543 // Xor Memory with Immediate
13544 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13545 %{
13546 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13547 effect(KILL cr);
13548 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13549
13550 ins_cost(125);
13551 format %{ "xorl $dst, $src\t# int" %}
13552 ins_encode %{
13553 __ xorl($dst$$Address, $src$$constant);
13554 %}
13555 ins_pipe(ialu_mem_imm);
13556 %}
13557
13558
13559 // Long Logical Instructions
13560
13561 // And Instructions
13562 // And Register with Register
13563 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13564 %{
13565 predicate(!UseAPX);
13566 match(Set dst (AndL dst src));
13567 effect(KILL cr);
13568 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13569
13570 format %{ "andq $dst, $src\t# long" %}
13571 ins_encode %{
13572 __ andq($dst$$Register, $src$$Register);
13573 %}
13574 ins_pipe(ialu_reg_reg);
13575 %}
13576
13577 // And Register with Register using New Data Destination (NDD)
13578 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13579 %{
13580 predicate(UseAPX);
13581 match(Set dst (AndL src1 src2));
13582 effect(KILL cr);
13583 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13584
13585 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13586 ins_encode %{
13587 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13588
13589 %}
13590 ins_pipe(ialu_reg_reg);
13591 %}
13592
13593 // And Register with Immediate 255
13594 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13595 %{
13596 match(Set dst (AndL src mask));
13597
13598 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13599 ins_encode %{
13600 // movzbl zeroes out the upper 32-bit and does not need REX.W
13601 __ movzbl($dst$$Register, $src$$Register);
13602 %}
13603 ins_pipe(ialu_reg);
13604 %}
13605
13606 // And Register with Immediate 65535
13607 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13608 %{
13609 match(Set dst (AndL src mask));
13610
13611 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13612 ins_encode %{
13613 // movzwl zeroes out the upper 32-bit and does not need REX.W
13614 __ movzwl($dst$$Register, $src$$Register);
13615 %}
13616 ins_pipe(ialu_reg);
13617 %}
13618
13619 // And Register with Immediate
13620 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13621 %{
13622 predicate(!UseAPX);
13623 match(Set dst (AndL dst src));
13624 effect(KILL cr);
13625 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13626
13627 format %{ "andq $dst, $src\t# long" %}
13628 ins_encode %{
13629 __ andq($dst$$Register, $src$$constant);
13630 %}
13631 ins_pipe(ialu_reg);
13632 %}
13633
13634 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13635 %{
13636 predicate(UseAPX);
13637 match(Set dst (AndL src1 src2));
13638 effect(KILL cr);
13639 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13640
13641 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13642 ins_encode %{
13643 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13644 %}
13645 ins_pipe(ialu_reg);
13646 %}
13647
13648 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13649 %{
13650 predicate(UseAPX);
13651 match(Set dst (AndL (LoadL src1) src2));
13652 effect(KILL cr);
13653 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13654
13655 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13656 ins_encode %{
13657 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13658 %}
13659 ins_pipe(ialu_reg);
13660 %}
13661
13662 // And Register with Memory
13663 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13664 %{
13665 predicate(!UseAPX);
13666 match(Set dst (AndL dst (LoadL src)));
13667 effect(KILL cr);
13668 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13669
13670 ins_cost(150);
13671 format %{ "andq $dst, $src\t# long" %}
13672 ins_encode %{
13673 __ andq($dst$$Register, $src$$Address);
13674 %}
13675 ins_pipe(ialu_reg_mem);
13676 %}
13677
13678 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13679 %{
13680 predicate(UseAPX);
13681 match(Set dst (AndL src1 (LoadL src2)));
13682 effect(KILL cr);
13683 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13684
13685 ins_cost(150);
13686 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13687 ins_encode %{
13688 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13689 %}
13690 ins_pipe(ialu_reg_mem);
13691 %}
13692
13693 // And Memory with Register
13694 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13695 %{
13696 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13697 effect(KILL cr);
13698 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13699
13700 ins_cost(150);
13701 format %{ "andq $dst, $src\t# long" %}
13702 ins_encode %{
13703 __ andq($dst$$Address, $src$$Register);
13704 %}
13705 ins_pipe(ialu_mem_reg);
13706 %}
13707
13708 // And Memory with Immediate
13709 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13710 %{
13711 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13712 effect(KILL cr);
13713 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13714
13715 ins_cost(125);
13716 format %{ "andq $dst, $src\t# long" %}
13717 ins_encode %{
13718 __ andq($dst$$Address, $src$$constant);
13719 %}
13720 ins_pipe(ialu_mem_imm);
13721 %}
13722
13723 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13724 %{
13725 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13726 // because AND/OR works well enough for 8/32-bit values.
13727 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13728
13729 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13730 effect(KILL cr);
13731
13732 ins_cost(125);
13733 format %{ "btrq $dst, log2(not($con))\t# long" %}
13734 ins_encode %{
13735 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13736 %}
13737 ins_pipe(ialu_mem_imm);
13738 %}
13739
13740 // BMI1 instructions
13741 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13742 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13743 predicate(UseBMI1Instructions);
13744 effect(KILL cr);
13745 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13746
13747 ins_cost(125);
13748 format %{ "andnq $dst, $src1, $src2" %}
13749
13750 ins_encode %{
13751 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13752 %}
13753 ins_pipe(ialu_reg_mem);
13754 %}
13755
13756 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13757 match(Set dst (AndL (XorL src1 minus_1) src2));
13758 predicate(UseBMI1Instructions);
13759 effect(KILL cr);
13760 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13761
13762 format %{ "andnq $dst, $src1, $src2" %}
13763
13764 ins_encode %{
13765 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13766 %}
13767 ins_pipe(ialu_reg_mem);
13768 %}
13769
13770 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13771 match(Set dst (AndL (SubL imm_zero src) src));
13772 predicate(UseBMI1Instructions);
13773 effect(KILL cr);
13774 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13775
13776 format %{ "blsiq $dst, $src" %}
13777
13778 ins_encode %{
13779 __ blsiq($dst$$Register, $src$$Register);
13780 %}
13781 ins_pipe(ialu_reg);
13782 %}
13783
13784 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13785 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13786 predicate(UseBMI1Instructions);
13787 effect(KILL cr);
13788 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13789
13790 ins_cost(125);
13791 format %{ "blsiq $dst, $src" %}
13792
13793 ins_encode %{
13794 __ blsiq($dst$$Register, $src$$Address);
13795 %}
13796 ins_pipe(ialu_reg_mem);
13797 %}
13798
13799 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13800 %{
13801 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13802 predicate(UseBMI1Instructions);
13803 effect(KILL cr);
13804 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13805
13806 ins_cost(125);
13807 format %{ "blsmskq $dst, $src" %}
13808
13809 ins_encode %{
13810 __ blsmskq($dst$$Register, $src$$Address);
13811 %}
13812 ins_pipe(ialu_reg_mem);
13813 %}
13814
13815 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13816 %{
13817 match(Set dst (XorL (AddL src minus_1) src));
13818 predicate(UseBMI1Instructions);
13819 effect(KILL cr);
13820 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13821
13822 format %{ "blsmskq $dst, $src" %}
13823
13824 ins_encode %{
13825 __ blsmskq($dst$$Register, $src$$Register);
13826 %}
13827
13828 ins_pipe(ialu_reg);
13829 %}
13830
13831 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13832 %{
13833 match(Set dst (AndL (AddL src minus_1) src) );
13834 predicate(UseBMI1Instructions);
13835 effect(KILL cr);
13836 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13837
13838 format %{ "blsrq $dst, $src" %}
13839
13840 ins_encode %{
13841 __ blsrq($dst$$Register, $src$$Register);
13842 %}
13843
13844 ins_pipe(ialu_reg);
13845 %}
13846
13847 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13848 %{
13849 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13850 predicate(UseBMI1Instructions);
13851 effect(KILL cr);
13852 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13853
13854 ins_cost(125);
13855 format %{ "blsrq $dst, $src" %}
13856
13857 ins_encode %{
13858 __ blsrq($dst$$Register, $src$$Address);
13859 %}
13860
13861 ins_pipe(ialu_reg);
13862 %}
13863
13864 // Or Instructions
13865 // Or Register with Register
13866 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13867 %{
13868 predicate(!UseAPX);
13869 match(Set dst (OrL dst src));
13870 effect(KILL cr);
13871 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13872
13873 format %{ "orq $dst, $src\t# long" %}
13874 ins_encode %{
13875 __ orq($dst$$Register, $src$$Register);
13876 %}
13877 ins_pipe(ialu_reg_reg);
13878 %}
13879
13880 // Or Register with Register using New Data Destination (NDD)
13881 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13882 %{
13883 predicate(UseAPX);
13884 match(Set dst (OrL src1 src2));
13885 effect(KILL cr);
13886 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13887
13888 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13889 ins_encode %{
13890 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13891
13892 %}
13893 ins_pipe(ialu_reg_reg);
13894 %}
13895
13896 // Use any_RegP to match R15 (TLS register) without spilling.
13897 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13898 match(Set dst (OrL dst (CastP2X src)));
13899 effect(KILL cr);
13900 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13901
13902 format %{ "orq $dst, $src\t# long" %}
13903 ins_encode %{
13904 __ orq($dst$$Register, $src$$Register);
13905 %}
13906 ins_pipe(ialu_reg_reg);
13907 %}
13908
13909 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13910 match(Set dst (OrL src1 (CastP2X src2)));
13911 effect(KILL cr);
13912 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13913
13914 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13915 ins_encode %{
13916 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13917 %}
13918 ins_pipe(ialu_reg_reg);
13919 %}
13920
13921 // Or Register with Immediate
13922 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13923 %{
13924 predicate(!UseAPX);
13925 match(Set dst (OrL dst src));
13926 effect(KILL cr);
13927 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13928
13929 format %{ "orq $dst, $src\t# long" %}
13930 ins_encode %{
13931 __ orq($dst$$Register, $src$$constant);
13932 %}
13933 ins_pipe(ialu_reg);
13934 %}
13935
13936 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13937 %{
13938 predicate(UseAPX);
13939 match(Set dst (OrL src1 src2));
13940 effect(KILL cr);
13941 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13942
13943 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13944 ins_encode %{
13945 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13946 %}
13947 ins_pipe(ialu_reg);
13948 %}
13949
13950 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13951 %{
13952 predicate(UseAPX);
13953 match(Set dst (OrL src1 src2));
13954 effect(KILL cr);
13955 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13956
13957 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
13958 ins_encode %{
13959 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13960 %}
13961 ins_pipe(ialu_reg);
13962 %}
13963
13964 // Or Memory with Immediate
13965 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13966 %{
13967 predicate(UseAPX);
13968 match(Set dst (OrL (LoadL src1) src2));
13969 effect(KILL cr);
13970 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13971
13972 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13973 ins_encode %{
13974 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
13975 %}
13976 ins_pipe(ialu_reg);
13977 %}
13978
13979 // Or Register with Memory
13980 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13981 %{
13982 predicate(!UseAPX);
13983 match(Set dst (OrL dst (LoadL src)));
13984 effect(KILL cr);
13985 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13986
13987 ins_cost(150);
13988 format %{ "orq $dst, $src\t# long" %}
13989 ins_encode %{
13990 __ orq($dst$$Register, $src$$Address);
13991 %}
13992 ins_pipe(ialu_reg_mem);
13993 %}
13994
13995 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13996 %{
13997 predicate(UseAPX);
13998 match(Set dst (OrL src1 (LoadL src2)));
13999 effect(KILL cr);
14000 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14001
14002 ins_cost(150);
14003 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14004 ins_encode %{
14005 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14006 %}
14007 ins_pipe(ialu_reg_mem);
14008 %}
14009
14010 // Or Memory with Register
14011 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14012 %{
14013 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14014 effect(KILL cr);
14015 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14016
14017 ins_cost(150);
14018 format %{ "orq $dst, $src\t# long" %}
14019 ins_encode %{
14020 __ orq($dst$$Address, $src$$Register);
14021 %}
14022 ins_pipe(ialu_mem_reg);
14023 %}
14024
14025 // Or Memory with Immediate
14026 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14027 %{
14028 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14029 effect(KILL cr);
14030 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14031
14032 ins_cost(125);
14033 format %{ "orq $dst, $src\t# long" %}
14034 ins_encode %{
14035 __ orq($dst$$Address, $src$$constant);
14036 %}
14037 ins_pipe(ialu_mem_imm);
14038 %}
14039
14040 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14041 %{
14042 // con should be a pure 64-bit power of 2 immediate
14043 // because AND/OR works well enough for 8/32-bit values.
14044 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14045
14046 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14047 effect(KILL cr);
14048
14049 ins_cost(125);
14050 format %{ "btsq $dst, log2($con)\t# long" %}
14051 ins_encode %{
14052 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14053 %}
14054 ins_pipe(ialu_mem_imm);
14055 %}
14056
14057 // Xor Instructions
14058 // Xor Register with Register
14059 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14060 %{
14061 predicate(!UseAPX);
14062 match(Set dst (XorL dst src));
14063 effect(KILL cr);
14064 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14065
14066 format %{ "xorq $dst, $src\t# long" %}
14067 ins_encode %{
14068 __ xorq($dst$$Register, $src$$Register);
14069 %}
14070 ins_pipe(ialu_reg_reg);
14071 %}
14072
14073 // Xor Register with Register using New Data Destination (NDD)
14074 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14075 %{
14076 predicate(UseAPX);
14077 match(Set dst (XorL src1 src2));
14078 effect(KILL cr);
14079 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14080
14081 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14082 ins_encode %{
14083 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14084 %}
14085 ins_pipe(ialu_reg_reg);
14086 %}
14087
14088 // Xor Register with Immediate -1
14089 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14090 %{
14091 predicate(!UseAPX);
14092 match(Set dst (XorL dst imm));
14093
14094 format %{ "notq $dst" %}
14095 ins_encode %{
14096 __ notq($dst$$Register);
14097 %}
14098 ins_pipe(ialu_reg);
14099 %}
14100
14101 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14102 %{
14103 predicate(UseAPX);
14104 match(Set dst (XorL src imm));
14105 flag(PD::Flag_ndd_demotable_opr1);
14106
14107 format %{ "enotq $dst, $src" %}
14108 ins_encode %{
14109 __ enotq($dst$$Register, $src$$Register);
14110 %}
14111 ins_pipe(ialu_reg);
14112 %}
14113
14114 // Xor Register with Immediate
14115 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14116 %{
14117 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14118 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14119 match(Set dst (XorL dst src));
14120 effect(KILL cr);
14121 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14122
14123 format %{ "xorq $dst, $src\t# long" %}
14124 ins_encode %{
14125 __ xorq($dst$$Register, $src$$constant);
14126 %}
14127 ins_pipe(ialu_reg);
14128 %}
14129
14130 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14131 %{
14132 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14133 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14134 match(Set dst (XorL src1 src2));
14135 effect(KILL cr);
14136 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14137
14138 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14139 ins_encode %{
14140 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14141 %}
14142 ins_pipe(ialu_reg);
14143 %}
14144
14145 // Xor Memory with Immediate
14146 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14147 %{
14148 predicate(UseAPX);
14149 match(Set dst (XorL (LoadL src1) src2));
14150 effect(KILL cr);
14151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14152 ins_cost(150);
14153
14154 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14155 ins_encode %{
14156 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14157 %}
14158 ins_pipe(ialu_reg);
14159 %}
14160
14161 // Xor Register with Memory
14162 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14163 %{
14164 predicate(!UseAPX);
14165 match(Set dst (XorL dst (LoadL src)));
14166 effect(KILL cr);
14167 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14168
14169 ins_cost(150);
14170 format %{ "xorq $dst, $src\t# long" %}
14171 ins_encode %{
14172 __ xorq($dst$$Register, $src$$Address);
14173 %}
14174 ins_pipe(ialu_reg_mem);
14175 %}
14176
14177 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14178 %{
14179 predicate(UseAPX);
14180 match(Set dst (XorL src1 (LoadL src2)));
14181 effect(KILL cr);
14182 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14183
14184 ins_cost(150);
14185 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14186 ins_encode %{
14187 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14188 %}
14189 ins_pipe(ialu_reg_mem);
14190 %}
14191
14192 // Xor Memory with Register
14193 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14194 %{
14195 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14196 effect(KILL cr);
14197 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14198
14199 ins_cost(150);
14200 format %{ "xorq $dst, $src\t# long" %}
14201 ins_encode %{
14202 __ xorq($dst$$Address, $src$$Register);
14203 %}
14204 ins_pipe(ialu_mem_reg);
14205 %}
14206
14207 // Xor Memory with Immediate
14208 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14209 %{
14210 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14211 effect(KILL cr);
14212 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14213
14214 ins_cost(125);
14215 format %{ "xorq $dst, $src\t# long" %}
14216 ins_encode %{
14217 __ xorq($dst$$Address, $src$$constant);
14218 %}
14219 ins_pipe(ialu_mem_imm);
14220 %}
14221
14222 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14223 %{
14224 match(Set dst (CmpLTMask p q));
14225 effect(KILL cr);
14226
14227 ins_cost(400);
14228 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14229 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14230 "negl $dst" %}
14231 ins_encode %{
14232 __ cmpl($p$$Register, $q$$Register);
14233 __ setcc(Assembler::less, $dst$$Register);
14234 __ negl($dst$$Register);
14235 %}
14236 ins_pipe(pipe_slow);
14237 %}
14238
14239 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14240 %{
14241 match(Set dst (CmpLTMask dst zero));
14242 effect(KILL cr);
14243
14244 ins_cost(100);
14245 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14246 ins_encode %{
14247 __ sarl($dst$$Register, 31);
14248 %}
14249 ins_pipe(ialu_reg);
14250 %}
14251
14252 /* Better to save a register than avoid a branch */
14253 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14254 %{
14255 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14256 effect(KILL cr);
14257 ins_cost(300);
14258 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14259 "jge done\n\t"
14260 "addl $p,$y\n"
14261 "done: " %}
14262 ins_encode %{
14263 Register Rp = $p$$Register;
14264 Register Rq = $q$$Register;
14265 Register Ry = $y$$Register;
14266 Label done;
14267 __ subl(Rp, Rq);
14268 __ jccb(Assembler::greaterEqual, done);
14269 __ addl(Rp, Ry);
14270 __ bind(done);
14271 %}
14272 ins_pipe(pipe_cmplt);
14273 %}
14274
14275 /* Better to save a register than avoid a branch */
14276 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14277 %{
14278 match(Set y (AndI (CmpLTMask p q) y));
14279 effect(KILL cr);
14280
14281 ins_cost(300);
14282
14283 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14284 "jlt done\n\t"
14285 "xorl $y, $y\n"
14286 "done: " %}
14287 ins_encode %{
14288 Register Rp = $p$$Register;
14289 Register Rq = $q$$Register;
14290 Register Ry = $y$$Register;
14291 Label done;
14292 __ cmpl(Rp, Rq);
14293 __ jccb(Assembler::less, done);
14294 __ xorl(Ry, Ry);
14295 __ bind(done);
14296 %}
14297 ins_pipe(pipe_cmplt);
14298 %}
14299
14300
14301 //---------- FP Instructions------------------------------------------------
14302
14303 // Really expensive, avoid
14304 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14305 %{
14306 match(Set cr (CmpF src1 src2));
14307
14308 ins_cost(500);
14309 format %{ "ucomiss $src1, $src2\n\t"
14310 "jnp,s exit\n\t"
14311 "pushfq\t# saw NaN, set CF\n\t"
14312 "andq [rsp], #0xffffff2b\n\t"
14313 "popfq\n"
14314 "exit:" %}
14315 ins_encode %{
14316 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14317 emit_cmpfp_fixup(masm);
14318 %}
14319 ins_pipe(pipe_slow);
14320 %}
14321
14322 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
14323 match(Set cr (CmpF src1 src2));
14324
14325 ins_cost(100);
14326 format %{ "ucomiss $src1, $src2" %}
14327 ins_encode %{
14328 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14329 %}
14330 ins_pipe(pipe_slow);
14331 %}
14332
14333 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14334 match(Set cr (CmpF src1 (LoadF src2)));
14335
14336 ins_cost(100);
14337 format %{ "ucomiss $src1, $src2" %}
14338 ins_encode %{
14339 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14340 %}
14341 ins_pipe(pipe_slow);
14342 %}
14343
14344 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14345 match(Set cr (CmpF src con));
14346 ins_cost(100);
14347 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14348 ins_encode %{
14349 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14350 %}
14351 ins_pipe(pipe_slow);
14352 %}
14353
14354 // Really expensive, avoid
14355 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14356 %{
14357 match(Set cr (CmpD src1 src2));
14358
14359 ins_cost(500);
14360 format %{ "ucomisd $src1, $src2\n\t"
14361 "jnp,s exit\n\t"
14362 "pushfq\t# saw NaN, set CF\n\t"
14363 "andq [rsp], #0xffffff2b\n\t"
14364 "popfq\n"
14365 "exit:" %}
14366 ins_encode %{
14367 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14368 emit_cmpfp_fixup(masm);
14369 %}
14370 ins_pipe(pipe_slow);
14371 %}
14372
14373 instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
14374 match(Set cr (CmpD src1 src2));
14375
14376 ins_cost(100);
14377 format %{ "ucomisd $src1, $src2 test" %}
14378 ins_encode %{
14379 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14380 %}
14381 ins_pipe(pipe_slow);
14382 %}
14383
14384 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14385 match(Set cr (CmpD src1 (LoadD src2)));
14386
14387 ins_cost(100);
14388 format %{ "ucomisd $src1, $src2" %}
14389 ins_encode %{
14390 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14391 %}
14392 ins_pipe(pipe_slow);
14393 %}
14394
14395 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14396 match(Set cr (CmpD src con));
14397 ins_cost(100);
14398 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14399 ins_encode %{
14400 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14401 %}
14402 ins_pipe(pipe_slow);
14403 %}
14404
14405 // Compare into -1,0,1
14406 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14407 %{
14408 match(Set dst (CmpF3 src1 src2));
14409 effect(KILL cr);
14410
14411 ins_cost(275);
14412 format %{ "ucomiss $src1, $src2\n\t"
14413 "movl $dst, #-1\n\t"
14414 "jp,s done\n\t"
14415 "jb,s done\n\t"
14416 "setne $dst\n\t"
14417 "movzbl $dst, $dst\n"
14418 "done:" %}
14419 ins_encode %{
14420 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14421 emit_cmpfp3(masm, $dst$$Register);
14422 %}
14423 ins_pipe(pipe_slow);
14424 %}
14425
14426 // Compare into -1,0,1
14427 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14428 %{
14429 match(Set dst (CmpF3 src1 (LoadF src2)));
14430 effect(KILL cr);
14431
14432 ins_cost(275);
14433 format %{ "ucomiss $src1, $src2\n\t"
14434 "movl $dst, #-1\n\t"
14435 "jp,s done\n\t"
14436 "jb,s done\n\t"
14437 "setne $dst\n\t"
14438 "movzbl $dst, $dst\n"
14439 "done:" %}
14440 ins_encode %{
14441 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14442 emit_cmpfp3(masm, $dst$$Register);
14443 %}
14444 ins_pipe(pipe_slow);
14445 %}
14446
14447 // Compare into -1,0,1
14448 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14449 match(Set dst (CmpF3 src con));
14450 effect(KILL cr);
14451
14452 ins_cost(275);
14453 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14454 "movl $dst, #-1\n\t"
14455 "jp,s done\n\t"
14456 "jb,s done\n\t"
14457 "setne $dst\n\t"
14458 "movzbl $dst, $dst\n"
14459 "done:" %}
14460 ins_encode %{
14461 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14462 emit_cmpfp3(masm, $dst$$Register);
14463 %}
14464 ins_pipe(pipe_slow);
14465 %}
14466
14467 // Compare into -1,0,1
14468 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14469 %{
14470 match(Set dst (CmpD3 src1 src2));
14471 effect(KILL cr);
14472
14473 ins_cost(275);
14474 format %{ "ucomisd $src1, $src2\n\t"
14475 "movl $dst, #-1\n\t"
14476 "jp,s done\n\t"
14477 "jb,s done\n\t"
14478 "setne $dst\n\t"
14479 "movzbl $dst, $dst\n"
14480 "done:" %}
14481 ins_encode %{
14482 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14483 emit_cmpfp3(masm, $dst$$Register);
14484 %}
14485 ins_pipe(pipe_slow);
14486 %}
14487
14488 // Compare into -1,0,1
14489 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14490 %{
14491 match(Set dst (CmpD3 src1 (LoadD src2)));
14492 effect(KILL cr);
14493
14494 ins_cost(275);
14495 format %{ "ucomisd $src1, $src2\n\t"
14496 "movl $dst, #-1\n\t"
14497 "jp,s done\n\t"
14498 "jb,s done\n\t"
14499 "setne $dst\n\t"
14500 "movzbl $dst, $dst\n"
14501 "done:" %}
14502 ins_encode %{
14503 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14504 emit_cmpfp3(masm, $dst$$Register);
14505 %}
14506 ins_pipe(pipe_slow);
14507 %}
14508
14509 // Compare into -1,0,1
14510 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14511 match(Set dst (CmpD3 src con));
14512 effect(KILL cr);
14513
14514 ins_cost(275);
14515 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14516 "movl $dst, #-1\n\t"
14517 "jp,s done\n\t"
14518 "jb,s done\n\t"
14519 "setne $dst\n\t"
14520 "movzbl $dst, $dst\n"
14521 "done:" %}
14522 ins_encode %{
14523 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14524 emit_cmpfp3(masm, $dst$$Register);
14525 %}
14526 ins_pipe(pipe_slow);
14527 %}
14528
14529 //----------Arithmetic Conversion Instructions---------------------------------
14530
14531 instruct convF2D_reg_reg(regD dst, regF src)
14532 %{
14533 match(Set dst (ConvF2D src));
14534
14535 format %{ "cvtss2sd $dst, $src" %}
14536 ins_encode %{
14537 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14538 %}
14539 ins_pipe(pipe_slow); // XXX
14540 %}
14541
14542 instruct convF2D_reg_mem(regD dst, memory src)
14543 %{
14544 predicate(UseAVX == 0);
14545 match(Set dst (ConvF2D (LoadF src)));
14546
14547 format %{ "cvtss2sd $dst, $src" %}
14548 ins_encode %{
14549 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14550 %}
14551 ins_pipe(pipe_slow); // XXX
14552 %}
14553
14554 instruct convD2F_reg_reg(regF dst, regD src)
14555 %{
14556 match(Set dst (ConvD2F src));
14557
14558 format %{ "cvtsd2ss $dst, $src" %}
14559 ins_encode %{
14560 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14561 %}
14562 ins_pipe(pipe_slow); // XXX
14563 %}
14564
14565 instruct convD2F_reg_mem(regF dst, memory src)
14566 %{
14567 predicate(UseAVX == 0);
14568 match(Set dst (ConvD2F (LoadD src)));
14569
14570 format %{ "cvtsd2ss $dst, $src" %}
14571 ins_encode %{
14572 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14573 %}
14574 ins_pipe(pipe_slow); // XXX
14575 %}
14576
14577 // XXX do mem variants
14578 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14579 %{
14580 predicate(!VM_Version::supports_avx10_2());
14581 match(Set dst (ConvF2I src));
14582 effect(KILL cr);
14583 format %{ "convert_f2i $dst, $src" %}
14584 ins_encode %{
14585 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14586 %}
14587 ins_pipe(pipe_slow);
14588 %}
14589
14590 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14591 %{
14592 predicate(VM_Version::supports_avx10_2());
14593 match(Set dst (ConvF2I src));
14594 format %{ "evcvttss2sisl $dst, $src" %}
14595 ins_encode %{
14596 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14597 %}
14598 ins_pipe(pipe_slow);
14599 %}
14600
14601 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14602 %{
14603 predicate(VM_Version::supports_avx10_2());
14604 match(Set dst (ConvF2I (LoadF src)));
14605 format %{ "evcvttss2sisl $dst, $src" %}
14606 ins_encode %{
14607 __ evcvttss2sisl($dst$$Register, $src$$Address);
14608 %}
14609 ins_pipe(pipe_slow);
14610 %}
14611
14612 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14613 %{
14614 predicate(!VM_Version::supports_avx10_2());
14615 match(Set dst (ConvF2L src));
14616 effect(KILL cr);
14617 format %{ "convert_f2l $dst, $src"%}
14618 ins_encode %{
14619 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14620 %}
14621 ins_pipe(pipe_slow);
14622 %}
14623
14624 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14625 %{
14626 predicate(VM_Version::supports_avx10_2());
14627 match(Set dst (ConvF2L src));
14628 format %{ "evcvttss2sisq $dst, $src" %}
14629 ins_encode %{
14630 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14631 %}
14632 ins_pipe(pipe_slow);
14633 %}
14634
14635 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14636 %{
14637 predicate(VM_Version::supports_avx10_2());
14638 match(Set dst (ConvF2L (LoadF src)));
14639 format %{ "evcvttss2sisq $dst, $src" %}
14640 ins_encode %{
14641 __ evcvttss2sisq($dst$$Register, $src$$Address);
14642 %}
14643 ins_pipe(pipe_slow);
14644 %}
14645
14646 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14647 %{
14648 predicate(!VM_Version::supports_avx10_2());
14649 match(Set dst (ConvD2I src));
14650 effect(KILL cr);
14651 format %{ "convert_d2i $dst, $src"%}
14652 ins_encode %{
14653 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14654 %}
14655 ins_pipe(pipe_slow);
14656 %}
14657
14658 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14659 %{
14660 predicate(VM_Version::supports_avx10_2());
14661 match(Set dst (ConvD2I src));
14662 format %{ "evcvttsd2sisl $dst, $src" %}
14663 ins_encode %{
14664 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14665 %}
14666 ins_pipe(pipe_slow);
14667 %}
14668
14669 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14670 %{
14671 predicate(VM_Version::supports_avx10_2());
14672 match(Set dst (ConvD2I (LoadD src)));
14673 format %{ "evcvttsd2sisl $dst, $src" %}
14674 ins_encode %{
14675 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14676 %}
14677 ins_pipe(pipe_slow);
14678 %}
14679
14680 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14681 %{
14682 predicate(!VM_Version::supports_avx10_2());
14683 match(Set dst (ConvD2L src));
14684 effect(KILL cr);
14685 format %{ "convert_d2l $dst, $src"%}
14686 ins_encode %{
14687 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14688 %}
14689 ins_pipe(pipe_slow);
14690 %}
14691
14692 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14693 %{
14694 predicate(VM_Version::supports_avx10_2());
14695 match(Set dst (ConvD2L src));
14696 format %{ "evcvttsd2sisq $dst, $src" %}
14697 ins_encode %{
14698 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14699 %}
14700 ins_pipe(pipe_slow);
14701 %}
14702
14703 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14704 %{
14705 predicate(VM_Version::supports_avx10_2());
14706 match(Set dst (ConvD2L (LoadD src)));
14707 format %{ "evcvttsd2sisq $dst, $src" %}
14708 ins_encode %{
14709 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14710 %}
14711 ins_pipe(pipe_slow);
14712 %}
14713
14714 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14715 %{
14716 match(Set dst (RoundD src));
14717 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14718 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14719 ins_encode %{
14720 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14721 %}
14722 ins_pipe(pipe_slow);
14723 %}
14724
14725 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14726 %{
14727 match(Set dst (RoundF src));
14728 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14729 format %{ "round_float $dst,$src" %}
14730 ins_encode %{
14731 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14732 %}
14733 ins_pipe(pipe_slow);
14734 %}
14735
14736 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14737 %{
14738 predicate(!UseXmmI2F);
14739 match(Set dst (ConvI2F src));
14740
14741 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14742 ins_encode %{
14743 if (UseAVX > 0) {
14744 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14745 }
14746 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14747 %}
14748 ins_pipe(pipe_slow); // XXX
14749 %}
14750
14751 instruct convI2F_reg_mem(regF dst, memory src)
14752 %{
14753 predicate(UseAVX == 0);
14754 match(Set dst (ConvI2F (LoadI src)));
14755
14756 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14757 ins_encode %{
14758 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14759 %}
14760 ins_pipe(pipe_slow); // XXX
14761 %}
14762
14763 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14764 %{
14765 predicate(!UseXmmI2D);
14766 match(Set dst (ConvI2D src));
14767
14768 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14769 ins_encode %{
14770 if (UseAVX > 0) {
14771 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14772 }
14773 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14774 %}
14775 ins_pipe(pipe_slow); // XXX
14776 %}
14777
14778 instruct convI2D_reg_mem(regD dst, memory src)
14779 %{
14780 predicate(UseAVX == 0);
14781 match(Set dst (ConvI2D (LoadI src)));
14782
14783 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14784 ins_encode %{
14785 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14786 %}
14787 ins_pipe(pipe_slow); // XXX
14788 %}
14789
14790 instruct convXI2F_reg(regF dst, rRegI src)
14791 %{
14792 predicate(UseXmmI2F);
14793 match(Set dst (ConvI2F src));
14794
14795 format %{ "movdl $dst, $src\n\t"
14796 "cvtdq2psl $dst, $dst\t# i2f" %}
14797 ins_encode %{
14798 __ movdl($dst$$XMMRegister, $src$$Register);
14799 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14800 %}
14801 ins_pipe(pipe_slow); // XXX
14802 %}
14803
14804 instruct convXI2D_reg(regD dst, rRegI src)
14805 %{
14806 predicate(UseXmmI2D);
14807 match(Set dst (ConvI2D src));
14808
14809 format %{ "movdl $dst, $src\n\t"
14810 "cvtdq2pdl $dst, $dst\t# i2d" %}
14811 ins_encode %{
14812 __ movdl($dst$$XMMRegister, $src$$Register);
14813 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14814 %}
14815 ins_pipe(pipe_slow); // XXX
14816 %}
14817
14818 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14819 %{
14820 match(Set dst (ConvL2F src));
14821
14822 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14823 ins_encode %{
14824 if (UseAVX > 0) {
14825 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14826 }
14827 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14828 %}
14829 ins_pipe(pipe_slow); // XXX
14830 %}
14831
14832 instruct convL2F_reg_mem(regF dst, memory src)
14833 %{
14834 predicate(UseAVX == 0);
14835 match(Set dst (ConvL2F (LoadL src)));
14836
14837 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14838 ins_encode %{
14839 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14840 %}
14841 ins_pipe(pipe_slow); // XXX
14842 %}
14843
14844 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14845 %{
14846 match(Set dst (ConvL2D src));
14847
14848 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14849 ins_encode %{
14850 if (UseAVX > 0) {
14851 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14852 }
14853 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14854 %}
14855 ins_pipe(pipe_slow); // XXX
14856 %}
14857
14858 instruct convL2D_reg_mem(regD dst, memory src)
14859 %{
14860 predicate(UseAVX == 0);
14861 match(Set dst (ConvL2D (LoadL src)));
14862
14863 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14864 ins_encode %{
14865 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14866 %}
14867 ins_pipe(pipe_slow); // XXX
14868 %}
14869
14870 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14871 %{
14872 match(Set dst (ConvI2L src));
14873
14874 ins_cost(125);
14875 format %{ "movslq $dst, $src\t# i2l" %}
14876 ins_encode %{
14877 __ movslq($dst$$Register, $src$$Register);
14878 %}
14879 ins_pipe(ialu_reg_reg);
14880 %}
14881
14882 // Zero-extend convert int to long
14883 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14884 %{
14885 match(Set dst (AndL (ConvI2L src) mask));
14886
14887 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14888 ins_encode %{
14889 if ($dst$$reg != $src$$reg) {
14890 __ movl($dst$$Register, $src$$Register);
14891 }
14892 %}
14893 ins_pipe(ialu_reg_reg);
14894 %}
14895
14896 // Zero-extend convert int to long
14897 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14898 %{
14899 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14900
14901 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14902 ins_encode %{
14903 __ movl($dst$$Register, $src$$Address);
14904 %}
14905 ins_pipe(ialu_reg_mem);
14906 %}
14907
14908 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14909 %{
14910 match(Set dst (AndL src mask));
14911
14912 format %{ "movl $dst, $src\t# zero-extend long" %}
14913 ins_encode %{
14914 __ movl($dst$$Register, $src$$Register);
14915 %}
14916 ins_pipe(ialu_reg_reg);
14917 %}
14918
14919 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14920 %{
14921 match(Set dst (ConvL2I src));
14922
14923 format %{ "movl $dst, $src\t# l2i" %}
14924 ins_encode %{
14925 __ movl($dst$$Register, $src$$Register);
14926 %}
14927 ins_pipe(ialu_reg_reg);
14928 %}
14929
14930
14931 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14932 match(Set dst (MoveF2I src));
14933 effect(DEF dst, USE src);
14934
14935 ins_cost(125);
14936 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
14937 ins_encode %{
14938 __ movl($dst$$Register, Address(rsp, $src$$disp));
14939 %}
14940 ins_pipe(ialu_reg_mem);
14941 %}
14942
14943 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14944 match(Set dst (MoveI2F src));
14945 effect(DEF dst, USE src);
14946
14947 ins_cost(125);
14948 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
14949 ins_encode %{
14950 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14951 %}
14952 ins_pipe(pipe_slow);
14953 %}
14954
14955 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14956 match(Set dst (MoveD2L src));
14957 effect(DEF dst, USE src);
14958
14959 ins_cost(125);
14960 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
14961 ins_encode %{
14962 __ movq($dst$$Register, Address(rsp, $src$$disp));
14963 %}
14964 ins_pipe(ialu_reg_mem);
14965 %}
14966
14967 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14968 predicate(!UseXmmLoadAndClearUpper);
14969 match(Set dst (MoveL2D src));
14970 effect(DEF dst, USE src);
14971
14972 ins_cost(125);
14973 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
14974 ins_encode %{
14975 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14976 %}
14977 ins_pipe(pipe_slow);
14978 %}
14979
14980 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14981 predicate(UseXmmLoadAndClearUpper);
14982 match(Set dst (MoveL2D src));
14983 effect(DEF dst, USE src);
14984
14985 ins_cost(125);
14986 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
14987 ins_encode %{
14988 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14989 %}
14990 ins_pipe(pipe_slow);
14991 %}
14992
14993
14994 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14995 match(Set dst (MoveF2I src));
14996 effect(DEF dst, USE src);
14997
14998 ins_cost(95); // XXX
14999 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15000 ins_encode %{
15001 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15002 %}
15003 ins_pipe(pipe_slow);
15004 %}
15005
15006 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15007 match(Set dst (MoveI2F src));
15008 effect(DEF dst, USE src);
15009
15010 ins_cost(100);
15011 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15012 ins_encode %{
15013 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15014 %}
15015 ins_pipe( ialu_mem_reg );
15016 %}
15017
15018 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15019 match(Set dst (MoveD2L src));
15020 effect(DEF dst, USE src);
15021
15022 ins_cost(95); // XXX
15023 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15024 ins_encode %{
15025 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15026 %}
15027 ins_pipe(pipe_slow);
15028 %}
15029
15030 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15031 match(Set dst (MoveL2D src));
15032 effect(DEF dst, USE src);
15033
15034 ins_cost(100);
15035 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15036 ins_encode %{
15037 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15038 %}
15039 ins_pipe(ialu_mem_reg);
15040 %}
15041
15042 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15043 match(Set dst (MoveF2I src));
15044 effect(DEF dst, USE src);
15045 ins_cost(85);
15046 format %{ "movd $dst,$src\t# MoveF2I" %}
15047 ins_encode %{
15048 __ movdl($dst$$Register, $src$$XMMRegister);
15049 %}
15050 ins_pipe( pipe_slow );
15051 %}
15052
15053 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15054 match(Set dst (MoveD2L src));
15055 effect(DEF dst, USE src);
15056 ins_cost(85);
15057 format %{ "movd $dst,$src\t# MoveD2L" %}
15058 ins_encode %{
15059 __ movdq($dst$$Register, $src$$XMMRegister);
15060 %}
15061 ins_pipe( pipe_slow );
15062 %}
15063
15064 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15065 match(Set dst (MoveI2F src));
15066 effect(DEF dst, USE src);
15067 ins_cost(100);
15068 format %{ "movd $dst,$src\t# MoveI2F" %}
15069 ins_encode %{
15070 __ movdl($dst$$XMMRegister, $src$$Register);
15071 %}
15072 ins_pipe( pipe_slow );
15073 %}
15074
15075 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15076 match(Set dst (MoveL2D src));
15077 effect(DEF dst, USE src);
15078 ins_cost(100);
15079 format %{ "movd $dst,$src\t# MoveL2D" %}
15080 ins_encode %{
15081 __ movdq($dst$$XMMRegister, $src$$Register);
15082 %}
15083 ins_pipe( pipe_slow );
15084 %}
15085
15086 // Fast clearing of an array
15087 // Small non-constant lenght ClearArray for non-AVX512 targets.
15088 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15089 Universe dummy, rFlagsReg cr)
15090 %{
15091 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15092 match(Set dummy (ClearArray cnt base));
15093 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15094
15095 format %{ $$template
15096 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15097 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15098 $$emit$$"jg LARGE\n\t"
15099 $$emit$$"dec rcx\n\t"
15100 $$emit$$"js DONE\t# Zero length\n\t"
15101 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15102 $$emit$$"dec rcx\n\t"
15103 $$emit$$"jge LOOP\n\t"
15104 $$emit$$"jmp DONE\n\t"
15105 $$emit$$"# LARGE:\n\t"
15106 if (UseFastStosb) {
15107 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15108 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15109 } else if (UseXMMForObjInit) {
15110 $$emit$$"mov rdi,rax\n\t"
15111 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15112 $$emit$$"jmpq L_zero_64_bytes\n\t"
15113 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15114 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15115 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15116 $$emit$$"add 0x40,rax\n\t"
15117 $$emit$$"# L_zero_64_bytes:\n\t"
15118 $$emit$$"sub 0x8,rcx\n\t"
15119 $$emit$$"jge L_loop\n\t"
15120 $$emit$$"add 0x4,rcx\n\t"
15121 $$emit$$"jl L_tail\n\t"
15122 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15123 $$emit$$"add 0x20,rax\n\t"
15124 $$emit$$"sub 0x4,rcx\n\t"
15125 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15126 $$emit$$"add 0x4,rcx\n\t"
15127 $$emit$$"jle L_end\n\t"
15128 $$emit$$"dec rcx\n\t"
15129 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15130 $$emit$$"vmovq xmm0,(rax)\n\t"
15131 $$emit$$"add 0x8,rax\n\t"
15132 $$emit$$"dec rcx\n\t"
15133 $$emit$$"jge L_sloop\n\t"
15134 $$emit$$"# L_end:\n\t"
15135 } else {
15136 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15137 }
15138 $$emit$$"# DONE"
15139 %}
15140 ins_encode %{
15141 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15142 $tmp$$XMMRegister, false, knoreg);
15143 %}
15144 ins_pipe(pipe_slow);
15145 %}
15146
15147 // Small non-constant length ClearArray for AVX512 targets.
15148 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15149 Universe dummy, rFlagsReg cr)
15150 %{
15151 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15152 match(Set dummy (ClearArray cnt base));
15153 ins_cost(125);
15154 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15155
15156 format %{ $$template
15157 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15158 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15159 $$emit$$"jg LARGE\n\t"
15160 $$emit$$"dec rcx\n\t"
15161 $$emit$$"js DONE\t# Zero length\n\t"
15162 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15163 $$emit$$"dec rcx\n\t"
15164 $$emit$$"jge LOOP\n\t"
15165 $$emit$$"jmp DONE\n\t"
15166 $$emit$$"# LARGE:\n\t"
15167 if (UseFastStosb) {
15168 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15169 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15170 } else if (UseXMMForObjInit) {
15171 $$emit$$"mov rdi,rax\n\t"
15172 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15173 $$emit$$"jmpq L_zero_64_bytes\n\t"
15174 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15175 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15176 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15177 $$emit$$"add 0x40,rax\n\t"
15178 $$emit$$"# L_zero_64_bytes:\n\t"
15179 $$emit$$"sub 0x8,rcx\n\t"
15180 $$emit$$"jge L_loop\n\t"
15181 $$emit$$"add 0x4,rcx\n\t"
15182 $$emit$$"jl L_tail\n\t"
15183 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15184 $$emit$$"add 0x20,rax\n\t"
15185 $$emit$$"sub 0x4,rcx\n\t"
15186 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15187 $$emit$$"add 0x4,rcx\n\t"
15188 $$emit$$"jle L_end\n\t"
15189 $$emit$$"dec rcx\n\t"
15190 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15191 $$emit$$"vmovq xmm0,(rax)\n\t"
15192 $$emit$$"add 0x8,rax\n\t"
15193 $$emit$$"dec rcx\n\t"
15194 $$emit$$"jge L_sloop\n\t"
15195 $$emit$$"# L_end:\n\t"
15196 } else {
15197 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15198 }
15199 $$emit$$"# DONE"
15200 %}
15201 ins_encode %{
15202 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15203 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15204 %}
15205 ins_pipe(pipe_slow);
15206 %}
15207
15208 // Large non-constant length ClearArray for non-AVX512 targets.
15209 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15210 Universe dummy, rFlagsReg cr)
15211 %{
15212 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15213 match(Set dummy (ClearArray cnt base));
15214 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15215
15216 format %{ $$template
15217 if (UseFastStosb) {
15218 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15219 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15220 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15221 } else if (UseXMMForObjInit) {
15222 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15223 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15224 $$emit$$"jmpq L_zero_64_bytes\n\t"
15225 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15226 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15227 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15228 $$emit$$"add 0x40,rax\n\t"
15229 $$emit$$"# L_zero_64_bytes:\n\t"
15230 $$emit$$"sub 0x8,rcx\n\t"
15231 $$emit$$"jge L_loop\n\t"
15232 $$emit$$"add 0x4,rcx\n\t"
15233 $$emit$$"jl L_tail\n\t"
15234 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15235 $$emit$$"add 0x20,rax\n\t"
15236 $$emit$$"sub 0x4,rcx\n\t"
15237 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15238 $$emit$$"add 0x4,rcx\n\t"
15239 $$emit$$"jle L_end\n\t"
15240 $$emit$$"dec rcx\n\t"
15241 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15242 $$emit$$"vmovq xmm0,(rax)\n\t"
15243 $$emit$$"add 0x8,rax\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"jge L_sloop\n\t"
15246 $$emit$$"# L_end:\n\t"
15247 } else {
15248 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15249 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15250 }
15251 %}
15252 ins_encode %{
15253 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15254 $tmp$$XMMRegister, true, knoreg);
15255 %}
15256 ins_pipe(pipe_slow);
15257 %}
15258
15259 // Large non-constant length ClearArray for AVX512 targets.
15260 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15261 Universe dummy, rFlagsReg cr)
15262 %{
15263 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15264 match(Set dummy (ClearArray cnt base));
15265 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15266
15267 format %{ $$template
15268 if (UseFastStosb) {
15269 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15270 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15271 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15272 } else if (UseXMMForObjInit) {
15273 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15274 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15275 $$emit$$"jmpq L_zero_64_bytes\n\t"
15276 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15277 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15278 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15279 $$emit$$"add 0x40,rax\n\t"
15280 $$emit$$"# L_zero_64_bytes:\n\t"
15281 $$emit$$"sub 0x8,rcx\n\t"
15282 $$emit$$"jge L_loop\n\t"
15283 $$emit$$"add 0x4,rcx\n\t"
15284 $$emit$$"jl L_tail\n\t"
15285 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15286 $$emit$$"add 0x20,rax\n\t"
15287 $$emit$$"sub 0x4,rcx\n\t"
15288 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15289 $$emit$$"add 0x4,rcx\n\t"
15290 $$emit$$"jle L_end\n\t"
15291 $$emit$$"dec rcx\n\t"
15292 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15293 $$emit$$"vmovq xmm0,(rax)\n\t"
15294 $$emit$$"add 0x8,rax\n\t"
15295 $$emit$$"dec rcx\n\t"
15296 $$emit$$"jge L_sloop\n\t"
15297 $$emit$$"# L_end:\n\t"
15298 } else {
15299 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15300 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15301 }
15302 %}
15303 ins_encode %{
15304 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15305 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15306 %}
15307 ins_pipe(pipe_slow);
15308 %}
15309
15310 // Small constant length ClearArray for AVX512 targets.
15311 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15312 %{
15313 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15314 match(Set dummy (ClearArray cnt base));
15315 ins_cost(100);
15316 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15317 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15318 ins_encode %{
15319 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15320 %}
15321 ins_pipe(pipe_slow);
15322 %}
15323
15324 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15325 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15326 %{
15327 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15328 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15329 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15330
15331 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15332 ins_encode %{
15333 __ string_compare($str1$$Register, $str2$$Register,
15334 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15335 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15336 %}
15337 ins_pipe( pipe_slow );
15338 %}
15339
15340 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15341 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15342 %{
15343 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15344 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15345 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15346
15347 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15348 ins_encode %{
15349 __ string_compare($str1$$Register, $str2$$Register,
15350 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15351 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15352 %}
15353 ins_pipe( pipe_slow );
15354 %}
15355
15356 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15357 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15358 %{
15359 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15360 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15361 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15362
15363 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15364 ins_encode %{
15365 __ string_compare($str1$$Register, $str2$$Register,
15366 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15367 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15368 %}
15369 ins_pipe( pipe_slow );
15370 %}
15371
15372 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15373 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15374 %{
15375 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15376 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15377 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15378
15379 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15380 ins_encode %{
15381 __ string_compare($str1$$Register, $str2$$Register,
15382 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15383 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15384 %}
15385 ins_pipe( pipe_slow );
15386 %}
15387
15388 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15389 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15390 %{
15391 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15392 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15393 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15394
15395 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15396 ins_encode %{
15397 __ string_compare($str1$$Register, $str2$$Register,
15398 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15399 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15400 %}
15401 ins_pipe( pipe_slow );
15402 %}
15403
15404 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15405 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15406 %{
15407 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15408 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15409 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15410
15411 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15412 ins_encode %{
15413 __ string_compare($str1$$Register, $str2$$Register,
15414 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15415 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15416 %}
15417 ins_pipe( pipe_slow );
15418 %}
15419
15420 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15421 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15422 %{
15423 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15424 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15425 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15426
15427 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15428 ins_encode %{
15429 __ string_compare($str2$$Register, $str1$$Register,
15430 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15431 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15432 %}
15433 ins_pipe( pipe_slow );
15434 %}
15435
15436 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15437 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15438 %{
15439 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15440 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15441 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15442
15443 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15444 ins_encode %{
15445 __ string_compare($str2$$Register, $str1$$Register,
15446 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15447 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15448 %}
15449 ins_pipe( pipe_slow );
15450 %}
15451
15452 // fast search of substring with known size.
15453 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15454 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15455 %{
15456 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15457 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15458 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15459
15460 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15461 ins_encode %{
15462 int icnt2 = (int)$int_cnt2$$constant;
15463 if (icnt2 >= 16) {
15464 // IndexOf for constant substrings with size >= 16 elements
15465 // which don't need to be loaded through stack.
15466 __ string_indexofC8($str1$$Register, $str2$$Register,
15467 $cnt1$$Register, $cnt2$$Register,
15468 icnt2, $result$$Register,
15469 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15470 } else {
15471 // Small strings are loaded through stack if they cross page boundary.
15472 __ string_indexof($str1$$Register, $str2$$Register,
15473 $cnt1$$Register, $cnt2$$Register,
15474 icnt2, $result$$Register,
15475 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15476 }
15477 %}
15478 ins_pipe( pipe_slow );
15479 %}
15480
15481 // fast search of substring with known size.
15482 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15483 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15484 %{
15485 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15486 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15487 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15488
15489 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15490 ins_encode %{
15491 int icnt2 = (int)$int_cnt2$$constant;
15492 if (icnt2 >= 8) {
15493 // IndexOf for constant substrings with size >= 8 elements
15494 // which don't need to be loaded through stack.
15495 __ string_indexofC8($str1$$Register, $str2$$Register,
15496 $cnt1$$Register, $cnt2$$Register,
15497 icnt2, $result$$Register,
15498 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15499 } else {
15500 // Small strings are loaded through stack if they cross page boundary.
15501 __ string_indexof($str1$$Register, $str2$$Register,
15502 $cnt1$$Register, $cnt2$$Register,
15503 icnt2, $result$$Register,
15504 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15505 }
15506 %}
15507 ins_pipe( pipe_slow );
15508 %}
15509
15510 // fast search of substring with known size.
15511 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15512 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15513 %{
15514 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15515 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15516 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15517
15518 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15519 ins_encode %{
15520 int icnt2 = (int)$int_cnt2$$constant;
15521 if (icnt2 >= 8) {
15522 // IndexOf for constant substrings with size >= 8 elements
15523 // which don't need to be loaded through stack.
15524 __ string_indexofC8($str1$$Register, $str2$$Register,
15525 $cnt1$$Register, $cnt2$$Register,
15526 icnt2, $result$$Register,
15527 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15528 } else {
15529 // Small strings are loaded through stack if they cross page boundary.
15530 __ string_indexof($str1$$Register, $str2$$Register,
15531 $cnt1$$Register, $cnt2$$Register,
15532 icnt2, $result$$Register,
15533 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15534 }
15535 %}
15536 ins_pipe( pipe_slow );
15537 %}
15538
15539 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15540 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15541 %{
15542 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15543 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15544 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15545
15546 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15547 ins_encode %{
15548 __ string_indexof($str1$$Register, $str2$$Register,
15549 $cnt1$$Register, $cnt2$$Register,
15550 (-1), $result$$Register,
15551 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15552 %}
15553 ins_pipe( pipe_slow );
15554 %}
15555
15556 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15557 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15558 %{
15559 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15560 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15561 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15562
15563 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15564 ins_encode %{
15565 __ string_indexof($str1$$Register, $str2$$Register,
15566 $cnt1$$Register, $cnt2$$Register,
15567 (-1), $result$$Register,
15568 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15569 %}
15570 ins_pipe( pipe_slow );
15571 %}
15572
15573 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15574 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15575 %{
15576 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15577 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15578 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15579
15580 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15581 ins_encode %{
15582 __ string_indexof($str1$$Register, $str2$$Register,
15583 $cnt1$$Register, $cnt2$$Register,
15584 (-1), $result$$Register,
15585 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15586 %}
15587 ins_pipe( pipe_slow );
15588 %}
15589
15590 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15591 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15592 %{
15593 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15594 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15595 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15596 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15597 ins_encode %{
15598 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15599 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15600 %}
15601 ins_pipe( pipe_slow );
15602 %}
15603
15604 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15605 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15606 %{
15607 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15608 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15609 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15610 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15611 ins_encode %{
15612 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15613 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15614 %}
15615 ins_pipe( pipe_slow );
15616 %}
15617
15618 // fast string equals
15619 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15620 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15621 %{
15622 predicate(!VM_Version::supports_avx512vlbw());
15623 match(Set result (StrEquals (Binary str1 str2) cnt));
15624 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15625
15626 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15627 ins_encode %{
15628 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15629 $cnt$$Register, $result$$Register, $tmp3$$Register,
15630 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15631 %}
15632 ins_pipe( pipe_slow );
15633 %}
15634
15635 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15636 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15637 %{
15638 predicate(VM_Version::supports_avx512vlbw());
15639 match(Set result (StrEquals (Binary str1 str2) cnt));
15640 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15641
15642 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15643 ins_encode %{
15644 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15645 $cnt$$Register, $result$$Register, $tmp3$$Register,
15646 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15647 %}
15648 ins_pipe( pipe_slow );
15649 %}
15650
15651 // fast array equals
15652 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15653 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15654 %{
15655 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15656 match(Set result (AryEq ary1 ary2));
15657 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15658
15659 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15660 ins_encode %{
15661 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15662 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15663 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15664 %}
15665 ins_pipe( pipe_slow );
15666 %}
15667
15668 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15669 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15670 %{
15671 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15672 match(Set result (AryEq ary1 ary2));
15673 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15674
15675 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15676 ins_encode %{
15677 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15678 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15679 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15680 %}
15681 ins_pipe( pipe_slow );
15682 %}
15683
15684 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15685 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15686 %{
15687 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15688 match(Set result (AryEq ary1 ary2));
15689 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15690
15691 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15692 ins_encode %{
15693 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15694 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15695 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15696 %}
15697 ins_pipe( pipe_slow );
15698 %}
15699
15700 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15701 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15702 %{
15703 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15704 match(Set result (AryEq ary1 ary2));
15705 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15706
15707 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15708 ins_encode %{
15709 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15710 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15711 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15712 %}
15713 ins_pipe( pipe_slow );
15714 %}
15715
15716 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15717 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15718 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15719 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15720 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15721 %{
15722 predicate(UseAVX >= 2);
15723 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15724 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15725 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15726 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15727 USE basic_type, KILL cr);
15728
15729 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15730 ins_encode %{
15731 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15732 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15733 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15734 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15735 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15736 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15737 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15738 %}
15739 ins_pipe( pipe_slow );
15740 %}
15741
15742 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15743 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15744 %{
15745 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15746 match(Set result (CountPositives ary1 len));
15747 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15748
15749 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15750 ins_encode %{
15751 __ count_positives($ary1$$Register, $len$$Register,
15752 $result$$Register, $tmp3$$Register,
15753 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15754 %}
15755 ins_pipe( pipe_slow );
15756 %}
15757
15758 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15759 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15760 %{
15761 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15762 match(Set result (CountPositives ary1 len));
15763 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15764
15765 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15766 ins_encode %{
15767 __ count_positives($ary1$$Register, $len$$Register,
15768 $result$$Register, $tmp3$$Register,
15769 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15770 %}
15771 ins_pipe( pipe_slow );
15772 %}
15773
15774 // fast char[] to byte[] compression
15775 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15776 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15777 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15778 match(Set result (StrCompressedCopy src (Binary dst len)));
15779 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15780 USE_KILL len, KILL tmp5, KILL cr);
15781
15782 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15783 ins_encode %{
15784 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15785 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15786 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15787 knoreg, knoreg);
15788 %}
15789 ins_pipe( pipe_slow );
15790 %}
15791
15792 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15793 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15794 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15795 match(Set result (StrCompressedCopy src (Binary dst len)));
15796 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15797 USE_KILL len, KILL tmp5, KILL cr);
15798
15799 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15800 ins_encode %{
15801 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15802 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15803 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15804 $ktmp1$$KRegister, $ktmp2$$KRegister);
15805 %}
15806 ins_pipe( pipe_slow );
15807 %}
15808 // fast byte[] to char[] inflation
15809 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15810 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15811 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15812 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15813 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15814
15815 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15816 ins_encode %{
15817 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15818 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15819 %}
15820 ins_pipe( pipe_slow );
15821 %}
15822
15823 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15824 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15825 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15826 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15827 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15828
15829 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15830 ins_encode %{
15831 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15832 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15833 %}
15834 ins_pipe( pipe_slow );
15835 %}
15836
15837 // encode char[] to byte[] in ISO_8859_1
15838 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15839 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15840 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15841 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15842 match(Set result (EncodeISOArray src (Binary dst len)));
15843 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15844
15845 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15846 ins_encode %{
15847 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15848 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15849 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15850 %}
15851 ins_pipe( pipe_slow );
15852 %}
15853
15854 // encode char[] to byte[] in ASCII
15855 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15856 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15857 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15858 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15859 match(Set result (EncodeISOArray src (Binary dst len)));
15860 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15861
15862 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15863 ins_encode %{
15864 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15865 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15866 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15867 %}
15868 ins_pipe( pipe_slow );
15869 %}
15870
15871 //----------Overflow Math Instructions-----------------------------------------
15872
15873 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15874 %{
15875 match(Set cr (OverflowAddI op1 op2));
15876 effect(DEF cr, USE_KILL op1, USE op2);
15877
15878 format %{ "addl $op1, $op2\t# overflow check int" %}
15879
15880 ins_encode %{
15881 __ addl($op1$$Register, $op2$$Register);
15882 %}
15883 ins_pipe(ialu_reg_reg);
15884 %}
15885
15886 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15887 %{
15888 match(Set cr (OverflowAddI op1 op2));
15889 effect(DEF cr, USE_KILL op1, USE op2);
15890
15891 format %{ "addl $op1, $op2\t# overflow check int" %}
15892
15893 ins_encode %{
15894 __ addl($op1$$Register, $op2$$constant);
15895 %}
15896 ins_pipe(ialu_reg_reg);
15897 %}
15898
15899 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15900 %{
15901 match(Set cr (OverflowAddL op1 op2));
15902 effect(DEF cr, USE_KILL op1, USE op2);
15903
15904 format %{ "addq $op1, $op2\t# overflow check long" %}
15905 ins_encode %{
15906 __ addq($op1$$Register, $op2$$Register);
15907 %}
15908 ins_pipe(ialu_reg_reg);
15909 %}
15910
15911 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15912 %{
15913 match(Set cr (OverflowAddL op1 op2));
15914 effect(DEF cr, USE_KILL op1, USE op2);
15915
15916 format %{ "addq $op1, $op2\t# overflow check long" %}
15917 ins_encode %{
15918 __ addq($op1$$Register, $op2$$constant);
15919 %}
15920 ins_pipe(ialu_reg_reg);
15921 %}
15922
15923 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15924 %{
15925 match(Set cr (OverflowSubI op1 op2));
15926
15927 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15928 ins_encode %{
15929 __ cmpl($op1$$Register, $op2$$Register);
15930 %}
15931 ins_pipe(ialu_reg_reg);
15932 %}
15933
15934 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15935 %{
15936 match(Set cr (OverflowSubI op1 op2));
15937
15938 format %{ "cmpl $op1, $op2\t# overflow check int" %}
15939 ins_encode %{
15940 __ cmpl($op1$$Register, $op2$$constant);
15941 %}
15942 ins_pipe(ialu_reg_reg);
15943 %}
15944
15945 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15946 %{
15947 match(Set cr (OverflowSubL op1 op2));
15948
15949 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15950 ins_encode %{
15951 __ cmpq($op1$$Register, $op2$$Register);
15952 %}
15953 ins_pipe(ialu_reg_reg);
15954 %}
15955
15956 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15957 %{
15958 match(Set cr (OverflowSubL op1 op2));
15959
15960 format %{ "cmpq $op1, $op2\t# overflow check long" %}
15961 ins_encode %{
15962 __ cmpq($op1$$Register, $op2$$constant);
15963 %}
15964 ins_pipe(ialu_reg_reg);
15965 %}
15966
15967 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15968 %{
15969 match(Set cr (OverflowSubI zero op2));
15970 effect(DEF cr, USE_KILL op2);
15971
15972 format %{ "negl $op2\t# overflow check int" %}
15973 ins_encode %{
15974 __ negl($op2$$Register);
15975 %}
15976 ins_pipe(ialu_reg_reg);
15977 %}
15978
15979 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15980 %{
15981 match(Set cr (OverflowSubL zero op2));
15982 effect(DEF cr, USE_KILL op2);
15983
15984 format %{ "negq $op2\t# overflow check long" %}
15985 ins_encode %{
15986 __ negq($op2$$Register);
15987 %}
15988 ins_pipe(ialu_reg_reg);
15989 %}
15990
15991 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15992 %{
15993 match(Set cr (OverflowMulI op1 op2));
15994 effect(DEF cr, USE_KILL op1, USE op2);
15995
15996 format %{ "imull $op1, $op2\t# overflow check int" %}
15997 ins_encode %{
15998 __ imull($op1$$Register, $op2$$Register);
15999 %}
16000 ins_pipe(ialu_reg_reg_alu0);
16001 %}
16002
16003 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16004 %{
16005 match(Set cr (OverflowMulI op1 op2));
16006 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16007
16008 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16009 ins_encode %{
16010 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16011 %}
16012 ins_pipe(ialu_reg_reg_alu0);
16013 %}
16014
16015 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16016 %{
16017 match(Set cr (OverflowMulL op1 op2));
16018 effect(DEF cr, USE_KILL op1, USE op2);
16019
16020 format %{ "imulq $op1, $op2\t# overflow check long" %}
16021 ins_encode %{
16022 __ imulq($op1$$Register, $op2$$Register);
16023 %}
16024 ins_pipe(ialu_reg_reg_alu0);
16025 %}
16026
16027 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16028 %{
16029 match(Set cr (OverflowMulL op1 op2));
16030 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16031
16032 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16033 ins_encode %{
16034 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16035 %}
16036 ins_pipe(ialu_reg_reg_alu0);
16037 %}
16038
16039
16040 //----------Control Flow Instructions------------------------------------------
16041 // Signed compare Instructions
16042
16043 // XXX more variants!!
16044 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16045 %{
16046 match(Set cr (CmpI op1 op2));
16047 effect(DEF cr, USE op1, USE op2);
16048
16049 format %{ "cmpl $op1, $op2" %}
16050 ins_encode %{
16051 __ cmpl($op1$$Register, $op2$$Register);
16052 %}
16053 ins_pipe(ialu_cr_reg_reg);
16054 %}
16055
16056 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16057 %{
16058 match(Set cr (CmpI op1 op2));
16059
16060 format %{ "cmpl $op1, $op2" %}
16061 ins_encode %{
16062 __ cmpl($op1$$Register, $op2$$constant);
16063 %}
16064 ins_pipe(ialu_cr_reg_imm);
16065 %}
16066
16067 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16068 %{
16069 match(Set cr (CmpI op1 (LoadI op2)));
16070
16071 ins_cost(500); // XXX
16072 format %{ "cmpl $op1, $op2" %}
16073 ins_encode %{
16074 __ cmpl($op1$$Register, $op2$$Address);
16075 %}
16076 ins_pipe(ialu_cr_reg_mem);
16077 %}
16078
16079 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16080 %{
16081 match(Set cr (CmpI src zero));
16082
16083 format %{ "testl $src, $src" %}
16084 ins_encode %{
16085 __ testl($src$$Register, $src$$Register);
16086 %}
16087 ins_pipe(ialu_cr_reg_imm);
16088 %}
16089
16090 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16091 %{
16092 match(Set cr (CmpI (AndI src con) zero));
16093
16094 format %{ "testl $src, $con" %}
16095 ins_encode %{
16096 __ testl($src$$Register, $con$$constant);
16097 %}
16098 ins_pipe(ialu_cr_reg_imm);
16099 %}
16100
16101 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16102 %{
16103 match(Set cr (CmpI (AndI src1 src2) zero));
16104
16105 format %{ "testl $src1, $src2" %}
16106 ins_encode %{
16107 __ testl($src1$$Register, $src2$$Register);
16108 %}
16109 ins_pipe(ialu_cr_reg_imm);
16110 %}
16111
16112 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16113 %{
16114 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16115
16116 format %{ "testl $src, $mem" %}
16117 ins_encode %{
16118 __ testl($src$$Register, $mem$$Address);
16119 %}
16120 ins_pipe(ialu_cr_reg_mem);
16121 %}
16122
16123 // Unsigned compare Instructions; really, same as signed except they
16124 // produce an rFlagsRegU instead of rFlagsReg.
16125 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16126 %{
16127 match(Set cr (CmpU op1 op2));
16128
16129 format %{ "cmpl $op1, $op2\t# unsigned" %}
16130 ins_encode %{
16131 __ cmpl($op1$$Register, $op2$$Register);
16132 %}
16133 ins_pipe(ialu_cr_reg_reg);
16134 %}
16135
16136 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16137 %{
16138 match(Set cr (CmpU op1 op2));
16139
16140 format %{ "cmpl $op1, $op2\t# unsigned" %}
16141 ins_encode %{
16142 __ cmpl($op1$$Register, $op2$$constant);
16143 %}
16144 ins_pipe(ialu_cr_reg_imm);
16145 %}
16146
16147 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16148 %{
16149 match(Set cr (CmpU op1 (LoadI op2)));
16150
16151 ins_cost(500); // XXX
16152 format %{ "cmpl $op1, $op2\t# unsigned" %}
16153 ins_encode %{
16154 __ cmpl($op1$$Register, $op2$$Address);
16155 %}
16156 ins_pipe(ialu_cr_reg_mem);
16157 %}
16158
16159 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16160 %{
16161 match(Set cr (CmpU src zero));
16162
16163 format %{ "testl $src, $src\t# unsigned" %}
16164 ins_encode %{
16165 __ testl($src$$Register, $src$$Register);
16166 %}
16167 ins_pipe(ialu_cr_reg_imm);
16168 %}
16169
16170 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16171 %{
16172 match(Set cr (CmpP op1 op2));
16173
16174 format %{ "cmpq $op1, $op2\t# ptr" %}
16175 ins_encode %{
16176 __ cmpq($op1$$Register, $op2$$Register);
16177 %}
16178 ins_pipe(ialu_cr_reg_reg);
16179 %}
16180
16181 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16182 %{
16183 match(Set cr (CmpP op1 (LoadP op2)));
16184 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16185
16186 ins_cost(500); // XXX
16187 format %{ "cmpq $op1, $op2\t# ptr" %}
16188 ins_encode %{
16189 __ cmpq($op1$$Register, $op2$$Address);
16190 %}
16191 ins_pipe(ialu_cr_reg_mem);
16192 %}
16193
16194 // XXX this is generalized by compP_rReg_mem???
16195 // Compare raw pointer (used in out-of-heap check).
16196 // Only works because non-oop pointers must be raw pointers
16197 // and raw pointers have no anti-dependencies.
16198 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16199 %{
16200 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16201 n->in(2)->as_Load()->barrier_data() == 0);
16202 match(Set cr (CmpP op1 (LoadP op2)));
16203
16204 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16205 ins_encode %{
16206 __ cmpq($op1$$Register, $op2$$Address);
16207 %}
16208 ins_pipe(ialu_cr_reg_mem);
16209 %}
16210
16211 // This will generate a signed flags result. This should be OK since
16212 // any compare to a zero should be eq/neq.
16213 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16214 %{
16215 match(Set cr (CmpP src zero));
16216
16217 format %{ "testq $src, $src\t# ptr" %}
16218 ins_encode %{
16219 __ testq($src$$Register, $src$$Register);
16220 %}
16221 ins_pipe(ialu_cr_reg_imm);
16222 %}
16223
16224 // This will generate a signed flags result. This should be OK since
16225 // any compare to a zero should be eq/neq.
16226 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16227 %{
16228 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16229 n->in(1)->as_Load()->barrier_data() == 0);
16230 match(Set cr (CmpP (LoadP op) zero));
16231
16232 ins_cost(500); // XXX
16233 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16234 ins_encode %{
16235 __ testq($op$$Address, 0xFFFFFFFF);
16236 %}
16237 ins_pipe(ialu_cr_reg_imm);
16238 %}
16239
16240 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16241 %{
16242 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16243 n->in(1)->as_Load()->barrier_data() == 0);
16244 match(Set cr (CmpP (LoadP mem) zero));
16245
16246 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16247 ins_encode %{
16248 __ cmpq(r12, $mem$$Address);
16249 %}
16250 ins_pipe(ialu_cr_reg_mem);
16251 %}
16252
16253 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16254 %{
16255 match(Set cr (CmpN op1 op2));
16256
16257 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16258 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16259 ins_pipe(ialu_cr_reg_reg);
16260 %}
16261
16262 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16263 %{
16264 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16265 match(Set cr (CmpN src (LoadN mem)));
16266
16267 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16268 ins_encode %{
16269 __ cmpl($src$$Register, $mem$$Address);
16270 %}
16271 ins_pipe(ialu_cr_reg_mem);
16272 %}
16273
16274 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16275 match(Set cr (CmpN op1 op2));
16276
16277 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16278 ins_encode %{
16279 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16280 %}
16281 ins_pipe(ialu_cr_reg_imm);
16282 %}
16283
16284 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16285 %{
16286 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16287 match(Set cr (CmpN src (LoadN mem)));
16288
16289 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16290 ins_encode %{
16291 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16292 %}
16293 ins_pipe(ialu_cr_reg_mem);
16294 %}
16295
16296 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16297 match(Set cr (CmpN op1 op2));
16298
16299 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16300 ins_encode %{
16301 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16302 %}
16303 ins_pipe(ialu_cr_reg_imm);
16304 %}
16305
16306 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16307 %{
16308 predicate(!UseCompactObjectHeaders);
16309 match(Set cr (CmpN src (LoadNKlass mem)));
16310
16311 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16312 ins_encode %{
16313 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16314 %}
16315 ins_pipe(ialu_cr_reg_mem);
16316 %}
16317
16318 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16319 match(Set cr (CmpN src zero));
16320
16321 format %{ "testl $src, $src\t# compressed ptr" %}
16322 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16323 ins_pipe(ialu_cr_reg_imm);
16324 %}
16325
16326 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16327 %{
16328 predicate(CompressedOops::base() != nullptr &&
16329 n->in(1)->as_Load()->barrier_data() == 0);
16330 match(Set cr (CmpN (LoadN mem) zero));
16331
16332 ins_cost(500); // XXX
16333 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16334 ins_encode %{
16335 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16336 %}
16337 ins_pipe(ialu_cr_reg_mem);
16338 %}
16339
16340 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16341 %{
16342 predicate(CompressedOops::base() == nullptr &&
16343 n->in(1)->as_Load()->barrier_data() == 0);
16344 match(Set cr (CmpN (LoadN mem) zero));
16345
16346 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16347 ins_encode %{
16348 __ cmpl(r12, $mem$$Address);
16349 %}
16350 ins_pipe(ialu_cr_reg_mem);
16351 %}
16352
16353 // Yanked all unsigned pointer compare operations.
16354 // Pointer compares are done with CmpP which is already unsigned.
16355
16356 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16357 %{
16358 match(Set cr (CmpL op1 op2));
16359
16360 format %{ "cmpq $op1, $op2" %}
16361 ins_encode %{
16362 __ cmpq($op1$$Register, $op2$$Register);
16363 %}
16364 ins_pipe(ialu_cr_reg_reg);
16365 %}
16366
16367 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16368 %{
16369 match(Set cr (CmpL op1 op2));
16370
16371 format %{ "cmpq $op1, $op2" %}
16372 ins_encode %{
16373 __ cmpq($op1$$Register, $op2$$constant);
16374 %}
16375 ins_pipe(ialu_cr_reg_imm);
16376 %}
16377
16378 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16379 %{
16380 match(Set cr (CmpL op1 (LoadL op2)));
16381
16382 format %{ "cmpq $op1, $op2" %}
16383 ins_encode %{
16384 __ cmpq($op1$$Register, $op2$$Address);
16385 %}
16386 ins_pipe(ialu_cr_reg_mem);
16387 %}
16388
16389 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16390 %{
16391 match(Set cr (CmpL src zero));
16392
16393 format %{ "testq $src, $src" %}
16394 ins_encode %{
16395 __ testq($src$$Register, $src$$Register);
16396 %}
16397 ins_pipe(ialu_cr_reg_imm);
16398 %}
16399
16400 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16401 %{
16402 match(Set cr (CmpL (AndL src con) zero));
16403
16404 format %{ "testq $src, $con\t# long" %}
16405 ins_encode %{
16406 __ testq($src$$Register, $con$$constant);
16407 %}
16408 ins_pipe(ialu_cr_reg_imm);
16409 %}
16410
16411 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16412 %{
16413 match(Set cr (CmpL (AndL src1 src2) zero));
16414
16415 format %{ "testq $src1, $src2\t# long" %}
16416 ins_encode %{
16417 __ testq($src1$$Register, $src2$$Register);
16418 %}
16419 ins_pipe(ialu_cr_reg_imm);
16420 %}
16421
16422 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16423 %{
16424 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16425
16426 format %{ "testq $src, $mem" %}
16427 ins_encode %{
16428 __ testq($src$$Register, $mem$$Address);
16429 %}
16430 ins_pipe(ialu_cr_reg_mem);
16431 %}
16432
16433 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16434 %{
16435 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16436
16437 format %{ "testq $src, $mem" %}
16438 ins_encode %{
16439 __ testq($src$$Register, $mem$$Address);
16440 %}
16441 ins_pipe(ialu_cr_reg_mem);
16442 %}
16443
16444 // Manifest a CmpU result in an integer register. Very painful.
16445 // This is the test to avoid.
16446 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16447 %{
16448 match(Set dst (CmpU3 src1 src2));
16449 effect(KILL flags);
16450
16451 ins_cost(275); // XXX
16452 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16453 "movl $dst, -1\n\t"
16454 "jb,u done\n\t"
16455 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16456 "done:" %}
16457 ins_encode %{
16458 Label done;
16459 __ cmpl($src1$$Register, $src2$$Register);
16460 __ movl($dst$$Register, -1);
16461 __ jccb(Assembler::below, done);
16462 __ setcc(Assembler::notZero, $dst$$Register);
16463 __ bind(done);
16464 %}
16465 ins_pipe(pipe_slow);
16466 %}
16467
16468 // Manifest a CmpL result in an integer register. Very painful.
16469 // This is the test to avoid.
16470 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16471 %{
16472 match(Set dst (CmpL3 src1 src2));
16473 effect(KILL flags);
16474
16475 ins_cost(275); // XXX
16476 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16477 "movl $dst, -1\n\t"
16478 "jl,s done\n\t"
16479 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16480 "done:" %}
16481 ins_encode %{
16482 Label done;
16483 __ cmpq($src1$$Register, $src2$$Register);
16484 __ movl($dst$$Register, -1);
16485 __ jccb(Assembler::less, done);
16486 __ setcc(Assembler::notZero, $dst$$Register);
16487 __ bind(done);
16488 %}
16489 ins_pipe(pipe_slow);
16490 %}
16491
16492 // Manifest a CmpUL result in an integer register. Very painful.
16493 // This is the test to avoid.
16494 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16495 %{
16496 match(Set dst (CmpUL3 src1 src2));
16497 effect(KILL flags);
16498
16499 ins_cost(275); // XXX
16500 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16501 "movl $dst, -1\n\t"
16502 "jb,u done\n\t"
16503 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16504 "done:" %}
16505 ins_encode %{
16506 Label done;
16507 __ cmpq($src1$$Register, $src2$$Register);
16508 __ movl($dst$$Register, -1);
16509 __ jccb(Assembler::below, done);
16510 __ setcc(Assembler::notZero, $dst$$Register);
16511 __ bind(done);
16512 %}
16513 ins_pipe(pipe_slow);
16514 %}
16515
16516 // Unsigned long compare Instructions; really, same as signed long except they
16517 // produce an rFlagsRegU instead of rFlagsReg.
16518 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16519 %{
16520 match(Set cr (CmpUL op1 op2));
16521
16522 format %{ "cmpq $op1, $op2\t# unsigned" %}
16523 ins_encode %{
16524 __ cmpq($op1$$Register, $op2$$Register);
16525 %}
16526 ins_pipe(ialu_cr_reg_reg);
16527 %}
16528
16529 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16530 %{
16531 match(Set cr (CmpUL op1 op2));
16532
16533 format %{ "cmpq $op1, $op2\t# unsigned" %}
16534 ins_encode %{
16535 __ cmpq($op1$$Register, $op2$$constant);
16536 %}
16537 ins_pipe(ialu_cr_reg_imm);
16538 %}
16539
16540 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16541 %{
16542 match(Set cr (CmpUL op1 (LoadL op2)));
16543
16544 format %{ "cmpq $op1, $op2\t# unsigned" %}
16545 ins_encode %{
16546 __ cmpq($op1$$Register, $op2$$Address);
16547 %}
16548 ins_pipe(ialu_cr_reg_mem);
16549 %}
16550
16551 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16552 %{
16553 match(Set cr (CmpUL src zero));
16554
16555 format %{ "testq $src, $src\t# unsigned" %}
16556 ins_encode %{
16557 __ testq($src$$Register, $src$$Register);
16558 %}
16559 ins_pipe(ialu_cr_reg_imm);
16560 %}
16561
16562 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16563 %{
16564 match(Set cr (CmpI (LoadB mem) imm));
16565
16566 ins_cost(125);
16567 format %{ "cmpb $mem, $imm" %}
16568 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16569 ins_pipe(ialu_cr_reg_mem);
16570 %}
16571
16572 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16573 %{
16574 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16575
16576 ins_cost(125);
16577 format %{ "testb $mem, $imm\t# ubyte" %}
16578 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16579 ins_pipe(ialu_cr_reg_mem);
16580 %}
16581
16582 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16583 %{
16584 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16585
16586 ins_cost(125);
16587 format %{ "testb $mem, $imm\t# byte" %}
16588 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16589 ins_pipe(ialu_cr_reg_mem);
16590 %}
16591
16592 //----------Max and Min--------------------------------------------------------
16593 // Min Instructions
16594
16595 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16596 %{
16597 predicate(!UseAPX);
16598 effect(USE_DEF dst, USE src, USE cr);
16599
16600 format %{ "cmovlgt $dst, $src\t# min" %}
16601 ins_encode %{
16602 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16603 %}
16604 ins_pipe(pipe_cmov_reg);
16605 %}
16606
16607 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16608 %{
16609 predicate(UseAPX);
16610 effect(DEF dst, USE src1, USE src2, USE cr);
16611
16612 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16613 ins_encode %{
16614 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16615 %}
16616 ins_pipe(pipe_cmov_reg);
16617 %}
16618
16619 instruct minI_rReg(rRegI dst, rRegI src)
16620 %{
16621 predicate(!UseAPX);
16622 match(Set dst (MinI dst src));
16623
16624 ins_cost(200);
16625 expand %{
16626 rFlagsReg cr;
16627 compI_rReg(cr, dst, src);
16628 cmovI_reg_g(dst, src, cr);
16629 %}
16630 %}
16631
16632 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16633 %{
16634 predicate(UseAPX);
16635 match(Set dst (MinI src1 src2));
16636 effect(DEF dst, USE src1, USE src2);
16637 flag(PD::Flag_ndd_demotable_opr1);
16638
16639 ins_cost(200);
16640 expand %{
16641 rFlagsReg cr;
16642 compI_rReg(cr, src1, src2);
16643 cmovI_reg_g_ndd(dst, src1, src2, cr);
16644 %}
16645 %}
16646
16647 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16648 %{
16649 predicate(!UseAPX);
16650 effect(USE_DEF dst, USE src, USE cr);
16651
16652 format %{ "cmovllt $dst, $src\t# max" %}
16653 ins_encode %{
16654 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16655 %}
16656 ins_pipe(pipe_cmov_reg);
16657 %}
16658
16659 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16660 %{
16661 predicate(UseAPX);
16662 effect(DEF dst, USE src1, USE src2, USE cr);
16663
16664 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16665 ins_encode %{
16666 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16667 %}
16668 ins_pipe(pipe_cmov_reg);
16669 %}
16670
16671 instruct maxI_rReg(rRegI dst, rRegI src)
16672 %{
16673 predicate(!UseAPX);
16674 match(Set dst (MaxI dst src));
16675
16676 ins_cost(200);
16677 expand %{
16678 rFlagsReg cr;
16679 compI_rReg(cr, dst, src);
16680 cmovI_reg_l(dst, src, cr);
16681 %}
16682 %}
16683
16684 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16685 %{
16686 predicate(UseAPX);
16687 match(Set dst (MaxI src1 src2));
16688 effect(DEF dst, USE src1, USE src2);
16689 flag(PD::Flag_ndd_demotable_opr1);
16690
16691 ins_cost(200);
16692 expand %{
16693 rFlagsReg cr;
16694 compI_rReg(cr, src1, src2);
16695 cmovI_reg_l_ndd(dst, src1, src2, cr);
16696 %}
16697 %}
16698
16699 // ============================================================================
16700 // Branch Instructions
16701
16702 // Jump Direct - Label defines a relative address from JMP+1
16703 instruct jmpDir(label labl)
16704 %{
16705 match(Goto);
16706 effect(USE labl);
16707
16708 ins_cost(300);
16709 format %{ "jmp $labl" %}
16710 size(5);
16711 ins_encode %{
16712 Label* L = $labl$$label;
16713 __ jmp(*L, false); // Always long jump
16714 %}
16715 ins_pipe(pipe_jmp);
16716 %}
16717
16718 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16719 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16720 %{
16721 match(If cop cr);
16722 effect(USE labl);
16723
16724 ins_cost(300);
16725 format %{ "j$cop $labl" %}
16726 size(6);
16727 ins_encode %{
16728 Label* L = $labl$$label;
16729 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16730 %}
16731 ins_pipe(pipe_jcc);
16732 %}
16733
16734 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16735 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16736 %{
16737 match(CountedLoopEnd cop cr);
16738 effect(USE labl);
16739
16740 ins_cost(300);
16741 format %{ "j$cop $labl\t# loop end" %}
16742 size(6);
16743 ins_encode %{
16744 Label* L = $labl$$label;
16745 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16746 %}
16747 ins_pipe(pipe_jcc);
16748 %}
16749
16750 // Jump Direct Conditional - using unsigned comparison
16751 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16752 match(If cop cmp);
16753 effect(USE labl);
16754
16755 ins_cost(300);
16756 format %{ "j$cop,u $labl" %}
16757 size(6);
16758 ins_encode %{
16759 Label* L = $labl$$label;
16760 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16761 %}
16762 ins_pipe(pipe_jcc);
16763 %}
16764
16765 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16766 match(If cop cmp);
16767 effect(USE labl);
16768
16769 ins_cost(200);
16770 format %{ "j$cop,u $labl" %}
16771 size(6);
16772 ins_encode %{
16773 Label* L = $labl$$label;
16774 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16775 %}
16776 ins_pipe(pipe_jcc);
16777 %}
16778
16779 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16780 match(If cop cmp);
16781 effect(USE labl);
16782
16783 ins_cost(200);
16784 format %{ $$template
16785 if ($cop$$cmpcode == Assembler::notEqual) {
16786 $$emit$$"jp,u $labl\n\t"
16787 $$emit$$"j$cop,u $labl"
16788 } else {
16789 $$emit$$"jp,u done\n\t"
16790 $$emit$$"j$cop,u $labl\n\t"
16791 $$emit$$"done:"
16792 }
16793 %}
16794 ins_encode %{
16795 Label* l = $labl$$label;
16796 if ($cop$$cmpcode == Assembler::notEqual) {
16797 __ jcc(Assembler::parity, *l, false);
16798 __ jcc(Assembler::notEqual, *l, false);
16799 } else if ($cop$$cmpcode == Assembler::equal) {
16800 Label done;
16801 __ jccb(Assembler::parity, done);
16802 __ jcc(Assembler::equal, *l, false);
16803 __ bind(done);
16804 } else {
16805 ShouldNotReachHere();
16806 }
16807 %}
16808 ins_pipe(pipe_jcc);
16809 %}
16810
16811 // ============================================================================
16812 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16813 // superklass array for an instance of the superklass. Set a hidden
16814 // internal cache on a hit (cache is checked with exposed code in
16815 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16816 // encoding ALSO sets flags.
16817
16818 instruct partialSubtypeCheck(rdi_RegP result,
16819 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16820 rFlagsReg cr)
16821 %{
16822 match(Set result (PartialSubtypeCheck sub super));
16823 predicate(!UseSecondarySupersTable);
16824 effect(KILL rcx, KILL cr);
16825
16826 ins_cost(1100); // slightly larger than the next version
16827 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16828 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16829 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16830 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16831 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16832 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16833 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16834 "miss:\t" %}
16835
16836 ins_encode %{
16837 Label miss;
16838 // NB: Callers may assume that, when $result is a valid register,
16839 // check_klass_subtype_slow_path_linear sets it to a nonzero
16840 // value.
16841 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16842 $rcx$$Register, $result$$Register,
16843 nullptr, &miss,
16844 /*set_cond_codes:*/ true);
16845 __ xorptr($result$$Register, $result$$Register);
16846 __ bind(miss);
16847 %}
16848
16849 ins_pipe(pipe_slow);
16850 %}
16851
16852 // ============================================================================
16853 // Two versions of hashtable-based partialSubtypeCheck, both used when
16854 // we need to search for a super class in the secondary supers array.
16855 // The first is used when we don't know _a priori_ the class being
16856 // searched for. The second, far more common, is used when we do know:
16857 // this is used for instanceof, checkcast, and any case where C2 can
16858 // determine it by constant propagation.
16859
16860 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16861 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16862 rFlagsReg cr)
16863 %{
16864 match(Set result (PartialSubtypeCheck sub super));
16865 predicate(UseSecondarySupersTable);
16866 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16867
16868 ins_cost(1000);
16869 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16870
16871 ins_encode %{
16872 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16873 $temp3$$Register, $temp4$$Register, $result$$Register);
16874 %}
16875
16876 ins_pipe(pipe_slow);
16877 %}
16878
16879 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16880 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16881 rFlagsReg cr)
16882 %{
16883 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16884 predicate(UseSecondarySupersTable);
16885 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16886
16887 ins_cost(700); // smaller than the next version
16888 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16889
16890 ins_encode %{
16891 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16892 if (InlineSecondarySupersTest) {
16893 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16894 $temp3$$Register, $temp4$$Register, $result$$Register,
16895 super_klass_slot);
16896 } else {
16897 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16898 }
16899 %}
16900
16901 ins_pipe(pipe_slow);
16902 %}
16903
16904 // ============================================================================
16905 // Branch Instructions -- short offset versions
16906 //
16907 // These instructions are used to replace jumps of a long offset (the default
16908 // match) with jumps of a shorter offset. These instructions are all tagged
16909 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16910 // match rules in general matching. Instead, the ADLC generates a conversion
16911 // method in the MachNode which can be used to do in-place replacement of the
16912 // long variant with the shorter variant. The compiler will determine if a
16913 // branch can be taken by the is_short_branch_offset() predicate in the machine
16914 // specific code section of the file.
16915
16916 // Jump Direct - Label defines a relative address from JMP+1
16917 instruct jmpDir_short(label labl) %{
16918 match(Goto);
16919 effect(USE labl);
16920
16921 ins_cost(300);
16922 format %{ "jmp,s $labl" %}
16923 size(2);
16924 ins_encode %{
16925 Label* L = $labl$$label;
16926 __ jmpb(*L);
16927 %}
16928 ins_pipe(pipe_jmp);
16929 ins_short_branch(1);
16930 %}
16931
16932 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16933 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16934 match(If cop cr);
16935 effect(USE labl);
16936
16937 ins_cost(300);
16938 format %{ "j$cop,s $labl" %}
16939 size(2);
16940 ins_encode %{
16941 Label* L = $labl$$label;
16942 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16943 %}
16944 ins_pipe(pipe_jcc);
16945 ins_short_branch(1);
16946 %}
16947
16948 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16949 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16950 match(CountedLoopEnd cop cr);
16951 effect(USE labl);
16952
16953 ins_cost(300);
16954 format %{ "j$cop,s $labl\t# loop end" %}
16955 size(2);
16956 ins_encode %{
16957 Label* L = $labl$$label;
16958 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16959 %}
16960 ins_pipe(pipe_jcc);
16961 ins_short_branch(1);
16962 %}
16963
16964 // Jump Direct Conditional - using unsigned comparison
16965 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16966 match(If cop cmp);
16967 effect(USE labl);
16968
16969 ins_cost(300);
16970 format %{ "j$cop,us $labl" %}
16971 size(2);
16972 ins_encode %{
16973 Label* L = $labl$$label;
16974 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16975 %}
16976 ins_pipe(pipe_jcc);
16977 ins_short_branch(1);
16978 %}
16979
16980 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16981 match(If cop cmp);
16982 effect(USE labl);
16983
16984 ins_cost(300);
16985 format %{ "j$cop,us $labl" %}
16986 size(2);
16987 ins_encode %{
16988 Label* L = $labl$$label;
16989 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16990 %}
16991 ins_pipe(pipe_jcc);
16992 ins_short_branch(1);
16993 %}
16994
16995 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16996 match(If cop cmp);
16997 effect(USE labl);
16998
16999 ins_cost(300);
17000 format %{ $$template
17001 if ($cop$$cmpcode == Assembler::notEqual) {
17002 $$emit$$"jp,u,s $labl\n\t"
17003 $$emit$$"j$cop,u,s $labl"
17004 } else {
17005 $$emit$$"jp,u,s done\n\t"
17006 $$emit$$"j$cop,u,s $labl\n\t"
17007 $$emit$$"done:"
17008 }
17009 %}
17010 size(4);
17011 ins_encode %{
17012 Label* l = $labl$$label;
17013 if ($cop$$cmpcode == Assembler::notEqual) {
17014 __ jccb(Assembler::parity, *l);
17015 __ jccb(Assembler::notEqual, *l);
17016 } else if ($cop$$cmpcode == Assembler::equal) {
17017 Label done;
17018 __ jccb(Assembler::parity, done);
17019 __ jccb(Assembler::equal, *l);
17020 __ bind(done);
17021 } else {
17022 ShouldNotReachHere();
17023 }
17024 %}
17025 ins_pipe(pipe_jcc);
17026 ins_short_branch(1);
17027 %}
17028
17029 // ============================================================================
17030 // inlined locking and unlocking
17031
17032 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17033 match(Set cr (FastLock object box));
17034 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17035 ins_cost(300);
17036 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17037 ins_encode %{
17038 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17039 %}
17040 ins_pipe(pipe_slow);
17041 %}
17042
17043 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17044 match(Set cr (FastUnlock object rax_reg));
17045 effect(TEMP tmp, USE_KILL rax_reg);
17046 ins_cost(300);
17047 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17048 ins_encode %{
17049 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17050 %}
17051 ins_pipe(pipe_slow);
17052 %}
17053
17054
17055 // ============================================================================
17056 // Safepoint Instructions
17057 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17058 %{
17059 match(SafePoint poll);
17060 effect(KILL cr, USE poll);
17061
17062 format %{ "testl rax, [$poll]\t"
17063 "# Safepoint: poll for GC" %}
17064 ins_cost(125);
17065 ins_encode %{
17066 __ relocate(relocInfo::poll_type);
17067 address pre_pc = __ pc();
17068 __ testl(rax, Address($poll$$Register, 0));
17069 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17070 %}
17071 ins_pipe(ialu_reg_mem);
17072 %}
17073
17074 instruct mask_all_evexL(kReg dst, rRegL src) %{
17075 match(Set dst (MaskAll src));
17076 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17077 ins_encode %{
17078 int mask_len = Matcher::vector_length(this);
17079 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17080 %}
17081 ins_pipe( pipe_slow );
17082 %}
17083
17084 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17085 predicate(Matcher::vector_length(n) > 32);
17086 match(Set dst (MaskAll src));
17087 effect(TEMP tmp);
17088 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17089 ins_encode %{
17090 int mask_len = Matcher::vector_length(this);
17091 __ movslq($tmp$$Register, $src$$Register);
17092 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17093 %}
17094 ins_pipe( pipe_slow );
17095 %}
17096
17097 // ============================================================================
17098 // Procedure Call/Return Instructions
17099 // Call Java Static Instruction
17100 // Note: If this code changes, the corresponding ret_addr_offset() and
17101 // compute_padding() functions will have to be adjusted.
17102 instruct CallStaticJavaDirect(method meth) %{
17103 match(CallStaticJava);
17104 effect(USE meth);
17105
17106 ins_cost(300);
17107 format %{ "call,static " %}
17108 opcode(0xE8); /* E8 cd */
17109 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17110 ins_pipe(pipe_slow);
17111 ins_alignment(4);
17112 %}
17113
17114 // Call Java Dynamic Instruction
17115 // Note: If this code changes, the corresponding ret_addr_offset() and
17116 // compute_padding() functions will have to be adjusted.
17117 instruct CallDynamicJavaDirect(method meth)
17118 %{
17119 match(CallDynamicJava);
17120 effect(USE meth);
17121
17122 ins_cost(300);
17123 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17124 "call,dynamic " %}
17125 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17126 ins_pipe(pipe_slow);
17127 ins_alignment(4);
17128 %}
17129
17130 // Call Runtime Instruction
17131 instruct CallRuntimeDirect(method meth)
17132 %{
17133 match(CallRuntime);
17134 effect(USE meth);
17135
17136 ins_cost(300);
17137 format %{ "call,runtime " %}
17138 ins_encode(clear_avx, Java_To_Runtime(meth));
17139 ins_pipe(pipe_slow);
17140 %}
17141
17142 // Call runtime without safepoint
17143 instruct CallLeafDirect(method meth)
17144 %{
17145 match(CallLeaf);
17146 effect(USE meth);
17147
17148 ins_cost(300);
17149 format %{ "call_leaf,runtime " %}
17150 ins_encode(clear_avx, Java_To_Runtime(meth));
17151 ins_pipe(pipe_slow);
17152 %}
17153
17154 // Call runtime without safepoint and with vector arguments
17155 instruct CallLeafDirectVector(method meth)
17156 %{
17157 match(CallLeafVector);
17158 effect(USE meth);
17159
17160 ins_cost(300);
17161 format %{ "call_leaf,vector " %}
17162 ins_encode(Java_To_Runtime(meth));
17163 ins_pipe(pipe_slow);
17164 %}
17165
17166 // Call runtime without safepoint
17167 instruct CallLeafNoFPDirect(method meth)
17168 %{
17169 match(CallLeafNoFP);
17170 effect(USE meth);
17171
17172 ins_cost(300);
17173 format %{ "call_leaf_nofp,runtime " %}
17174 ins_encode(clear_avx, Java_To_Runtime(meth));
17175 ins_pipe(pipe_slow);
17176 %}
17177
17178 // Return Instruction
17179 // Remove the return address & jump to it.
17180 // Notice: We always emit a nop after a ret to make sure there is room
17181 // for safepoint patching
17182 instruct Ret()
17183 %{
17184 match(Return);
17185
17186 format %{ "ret" %}
17187 ins_encode %{
17188 __ ret(0);
17189 %}
17190 ins_pipe(pipe_jmp);
17191 %}
17192
17193 // Tail Call; Jump from runtime stub to Java code.
17194 // Also known as an 'interprocedural jump'.
17195 // Target of jump will eventually return to caller.
17196 // TailJump below removes the return address.
17197 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17198 // emitted just above the TailCall which has reset rbp to the caller state.
17199 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17200 %{
17201 match(TailCall jump_target method_ptr);
17202
17203 ins_cost(300);
17204 format %{ "jmp $jump_target\t# rbx holds method" %}
17205 ins_encode %{
17206 __ jmp($jump_target$$Register);
17207 %}
17208 ins_pipe(pipe_jmp);
17209 %}
17210
17211 // Tail Jump; remove the return address; jump to target.
17212 // TailCall above leaves the return address around.
17213 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17214 %{
17215 match(TailJump jump_target ex_oop);
17216
17217 ins_cost(300);
17218 format %{ "popq rdx\t# pop return address\n\t"
17219 "jmp $jump_target" %}
17220 ins_encode %{
17221 __ popq(as_Register(RDX_enc));
17222 __ jmp($jump_target$$Register);
17223 %}
17224 ins_pipe(pipe_jmp);
17225 %}
17226
17227 // Forward exception.
17228 instruct ForwardExceptionjmp()
17229 %{
17230 match(ForwardException);
17231
17232 format %{ "jmp forward_exception_stub" %}
17233 ins_encode %{
17234 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17235 %}
17236 ins_pipe(pipe_jmp);
17237 %}
17238
17239 // Create exception oop: created by stack-crawling runtime code.
17240 // Created exception is now available to this handler, and is setup
17241 // just prior to jumping to this handler. No code emitted.
17242 instruct CreateException(rax_RegP ex_oop)
17243 %{
17244 match(Set ex_oop (CreateEx));
17245
17246 size(0);
17247 // use the following format syntax
17248 format %{ "# exception oop is in rax; no code emitted" %}
17249 ins_encode();
17250 ins_pipe(empty);
17251 %}
17252
17253 // Rethrow exception:
17254 // The exception oop will come in the first argument position.
17255 // Then JUMP (not call) to the rethrow stub code.
17256 instruct RethrowException()
17257 %{
17258 match(Rethrow);
17259
17260 // use the following format syntax
17261 format %{ "jmp rethrow_stub" %}
17262 ins_encode %{
17263 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17264 %}
17265 ins_pipe(pipe_jmp);
17266 %}
17267
17268 // ============================================================================
17269 // This name is KNOWN by the ADLC and cannot be changed.
17270 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17271 // for this guy.
17272 instruct tlsLoadP(r15_RegP dst) %{
17273 match(Set dst (ThreadLocal));
17274 effect(DEF dst);
17275
17276 size(0);
17277 format %{ "# TLS is in R15" %}
17278 ins_encode( /*empty encoding*/ );
17279 ins_pipe(ialu_reg_reg);
17280 %}
17281
17282 instruct addF_reg(regF dst, regF src) %{
17283 predicate(UseAVX == 0);
17284 match(Set dst (AddF dst src));
17285
17286 format %{ "addss $dst, $src" %}
17287 ins_cost(150);
17288 ins_encode %{
17289 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17290 %}
17291 ins_pipe(pipe_slow);
17292 %}
17293
17294 instruct addF_mem(regF dst, memory src) %{
17295 predicate(UseAVX == 0);
17296 match(Set dst (AddF dst (LoadF src)));
17297
17298 format %{ "addss $dst, $src" %}
17299 ins_cost(150);
17300 ins_encode %{
17301 __ addss($dst$$XMMRegister, $src$$Address);
17302 %}
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 instruct addF_imm(regF dst, immF con) %{
17307 predicate(UseAVX == 0);
17308 match(Set dst (AddF dst con));
17309 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17310 ins_cost(150);
17311 ins_encode %{
17312 __ addss($dst$$XMMRegister, $constantaddress($con));
17313 %}
17314 ins_pipe(pipe_slow);
17315 %}
17316
17317 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17318 predicate(UseAVX > 0);
17319 match(Set dst (AddF src1 src2));
17320
17321 format %{ "vaddss $dst, $src1, $src2" %}
17322 ins_cost(150);
17323 ins_encode %{
17324 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17325 %}
17326 ins_pipe(pipe_slow);
17327 %}
17328
17329 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17330 predicate(UseAVX > 0);
17331 match(Set dst (AddF src1 (LoadF src2)));
17332
17333 format %{ "vaddss $dst, $src1, $src2" %}
17334 ins_cost(150);
17335 ins_encode %{
17336 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17337 %}
17338 ins_pipe(pipe_slow);
17339 %}
17340
17341 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17342 predicate(UseAVX > 0);
17343 match(Set dst (AddF src con));
17344
17345 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17346 ins_cost(150);
17347 ins_encode %{
17348 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17349 %}
17350 ins_pipe(pipe_slow);
17351 %}
17352
17353 instruct addD_reg(regD dst, regD src) %{
17354 predicate(UseAVX == 0);
17355 match(Set dst (AddD dst src));
17356
17357 format %{ "addsd $dst, $src" %}
17358 ins_cost(150);
17359 ins_encode %{
17360 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17361 %}
17362 ins_pipe(pipe_slow);
17363 %}
17364
17365 instruct addD_mem(regD dst, memory src) %{
17366 predicate(UseAVX == 0);
17367 match(Set dst (AddD dst (LoadD src)));
17368
17369 format %{ "addsd $dst, $src" %}
17370 ins_cost(150);
17371 ins_encode %{
17372 __ addsd($dst$$XMMRegister, $src$$Address);
17373 %}
17374 ins_pipe(pipe_slow);
17375 %}
17376
17377 instruct addD_imm(regD dst, immD con) %{
17378 predicate(UseAVX == 0);
17379 match(Set dst (AddD dst con));
17380 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17381 ins_cost(150);
17382 ins_encode %{
17383 __ addsd($dst$$XMMRegister, $constantaddress($con));
17384 %}
17385 ins_pipe(pipe_slow);
17386 %}
17387
17388 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17389 predicate(UseAVX > 0);
17390 match(Set dst (AddD src1 src2));
17391
17392 format %{ "vaddsd $dst, $src1, $src2" %}
17393 ins_cost(150);
17394 ins_encode %{
17395 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17396 %}
17397 ins_pipe(pipe_slow);
17398 %}
17399
17400 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17401 predicate(UseAVX > 0);
17402 match(Set dst (AddD src1 (LoadD src2)));
17403
17404 format %{ "vaddsd $dst, $src1, $src2" %}
17405 ins_cost(150);
17406 ins_encode %{
17407 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17408 %}
17409 ins_pipe(pipe_slow);
17410 %}
17411
17412 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17413 predicate(UseAVX > 0);
17414 match(Set dst (AddD src con));
17415
17416 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17417 ins_cost(150);
17418 ins_encode %{
17419 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17420 %}
17421 ins_pipe(pipe_slow);
17422 %}
17423
17424 instruct subF_reg(regF dst, regF src) %{
17425 predicate(UseAVX == 0);
17426 match(Set dst (SubF dst src));
17427
17428 format %{ "subss $dst, $src" %}
17429 ins_cost(150);
17430 ins_encode %{
17431 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17432 %}
17433 ins_pipe(pipe_slow);
17434 %}
17435
17436 instruct subF_mem(regF dst, memory src) %{
17437 predicate(UseAVX == 0);
17438 match(Set dst (SubF dst (LoadF src)));
17439
17440 format %{ "subss $dst, $src" %}
17441 ins_cost(150);
17442 ins_encode %{
17443 __ subss($dst$$XMMRegister, $src$$Address);
17444 %}
17445 ins_pipe(pipe_slow);
17446 %}
17447
17448 instruct subF_imm(regF dst, immF con) %{
17449 predicate(UseAVX == 0);
17450 match(Set dst (SubF dst con));
17451 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17452 ins_cost(150);
17453 ins_encode %{
17454 __ subss($dst$$XMMRegister, $constantaddress($con));
17455 %}
17456 ins_pipe(pipe_slow);
17457 %}
17458
17459 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17460 predicate(UseAVX > 0);
17461 match(Set dst (SubF src1 src2));
17462
17463 format %{ "vsubss $dst, $src1, $src2" %}
17464 ins_cost(150);
17465 ins_encode %{
17466 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17467 %}
17468 ins_pipe(pipe_slow);
17469 %}
17470
17471 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17472 predicate(UseAVX > 0);
17473 match(Set dst (SubF src1 (LoadF src2)));
17474
17475 format %{ "vsubss $dst, $src1, $src2" %}
17476 ins_cost(150);
17477 ins_encode %{
17478 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17479 %}
17480 ins_pipe(pipe_slow);
17481 %}
17482
17483 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17484 predicate(UseAVX > 0);
17485 match(Set dst (SubF src con));
17486
17487 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17488 ins_cost(150);
17489 ins_encode %{
17490 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17491 %}
17492 ins_pipe(pipe_slow);
17493 %}
17494
17495 instruct subD_reg(regD dst, regD src) %{
17496 predicate(UseAVX == 0);
17497 match(Set dst (SubD dst src));
17498
17499 format %{ "subsd $dst, $src" %}
17500 ins_cost(150);
17501 ins_encode %{
17502 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17503 %}
17504 ins_pipe(pipe_slow);
17505 %}
17506
17507 instruct subD_mem(regD dst, memory src) %{
17508 predicate(UseAVX == 0);
17509 match(Set dst (SubD dst (LoadD src)));
17510
17511 format %{ "subsd $dst, $src" %}
17512 ins_cost(150);
17513 ins_encode %{
17514 __ subsd($dst$$XMMRegister, $src$$Address);
17515 %}
17516 ins_pipe(pipe_slow);
17517 %}
17518
17519 instruct subD_imm(regD dst, immD con) %{
17520 predicate(UseAVX == 0);
17521 match(Set dst (SubD dst con));
17522 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17523 ins_cost(150);
17524 ins_encode %{
17525 __ subsd($dst$$XMMRegister, $constantaddress($con));
17526 %}
17527 ins_pipe(pipe_slow);
17528 %}
17529
17530 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17531 predicate(UseAVX > 0);
17532 match(Set dst (SubD src1 src2));
17533
17534 format %{ "vsubsd $dst, $src1, $src2" %}
17535 ins_cost(150);
17536 ins_encode %{
17537 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17538 %}
17539 ins_pipe(pipe_slow);
17540 %}
17541
17542 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17543 predicate(UseAVX > 0);
17544 match(Set dst (SubD src1 (LoadD src2)));
17545
17546 format %{ "vsubsd $dst, $src1, $src2" %}
17547 ins_cost(150);
17548 ins_encode %{
17549 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17550 %}
17551 ins_pipe(pipe_slow);
17552 %}
17553
17554 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17555 predicate(UseAVX > 0);
17556 match(Set dst (SubD src con));
17557
17558 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17559 ins_cost(150);
17560 ins_encode %{
17561 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17562 %}
17563 ins_pipe(pipe_slow);
17564 %}
17565
17566 instruct mulF_reg(regF dst, regF src) %{
17567 predicate(UseAVX == 0);
17568 match(Set dst (MulF dst src));
17569
17570 format %{ "mulss $dst, $src" %}
17571 ins_cost(150);
17572 ins_encode %{
17573 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17574 %}
17575 ins_pipe(pipe_slow);
17576 %}
17577
17578 instruct mulF_mem(regF dst, memory src) %{
17579 predicate(UseAVX == 0);
17580 match(Set dst (MulF dst (LoadF src)));
17581
17582 format %{ "mulss $dst, $src" %}
17583 ins_cost(150);
17584 ins_encode %{
17585 __ mulss($dst$$XMMRegister, $src$$Address);
17586 %}
17587 ins_pipe(pipe_slow);
17588 %}
17589
17590 instruct mulF_imm(regF dst, immF con) %{
17591 predicate(UseAVX == 0);
17592 match(Set dst (MulF dst con));
17593 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17594 ins_cost(150);
17595 ins_encode %{
17596 __ mulss($dst$$XMMRegister, $constantaddress($con));
17597 %}
17598 ins_pipe(pipe_slow);
17599 %}
17600
17601 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17602 predicate(UseAVX > 0);
17603 match(Set dst (MulF src1 src2));
17604
17605 format %{ "vmulss $dst, $src1, $src2" %}
17606 ins_cost(150);
17607 ins_encode %{
17608 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17609 %}
17610 ins_pipe(pipe_slow);
17611 %}
17612
17613 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17614 predicate(UseAVX > 0);
17615 match(Set dst (MulF src1 (LoadF src2)));
17616
17617 format %{ "vmulss $dst, $src1, $src2" %}
17618 ins_cost(150);
17619 ins_encode %{
17620 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17621 %}
17622 ins_pipe(pipe_slow);
17623 %}
17624
17625 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17626 predicate(UseAVX > 0);
17627 match(Set dst (MulF src con));
17628
17629 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17630 ins_cost(150);
17631 ins_encode %{
17632 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17633 %}
17634 ins_pipe(pipe_slow);
17635 %}
17636
17637 instruct mulD_reg(regD dst, regD src) %{
17638 predicate(UseAVX == 0);
17639 match(Set dst (MulD dst src));
17640
17641 format %{ "mulsd $dst, $src" %}
17642 ins_cost(150);
17643 ins_encode %{
17644 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17645 %}
17646 ins_pipe(pipe_slow);
17647 %}
17648
17649 instruct mulD_mem(regD dst, memory src) %{
17650 predicate(UseAVX == 0);
17651 match(Set dst (MulD dst (LoadD src)));
17652
17653 format %{ "mulsd $dst, $src" %}
17654 ins_cost(150);
17655 ins_encode %{
17656 __ mulsd($dst$$XMMRegister, $src$$Address);
17657 %}
17658 ins_pipe(pipe_slow);
17659 %}
17660
17661 instruct mulD_imm(regD dst, immD con) %{
17662 predicate(UseAVX == 0);
17663 match(Set dst (MulD dst con));
17664 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17665 ins_cost(150);
17666 ins_encode %{
17667 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17668 %}
17669 ins_pipe(pipe_slow);
17670 %}
17671
17672 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17673 predicate(UseAVX > 0);
17674 match(Set dst (MulD src1 src2));
17675
17676 format %{ "vmulsd $dst, $src1, $src2" %}
17677 ins_cost(150);
17678 ins_encode %{
17679 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17680 %}
17681 ins_pipe(pipe_slow);
17682 %}
17683
17684 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17685 predicate(UseAVX > 0);
17686 match(Set dst (MulD src1 (LoadD src2)));
17687
17688 format %{ "vmulsd $dst, $src1, $src2" %}
17689 ins_cost(150);
17690 ins_encode %{
17691 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17692 %}
17693 ins_pipe(pipe_slow);
17694 %}
17695
17696 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17697 predicate(UseAVX > 0);
17698 match(Set dst (MulD src con));
17699
17700 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17701 ins_cost(150);
17702 ins_encode %{
17703 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17704 %}
17705 ins_pipe(pipe_slow);
17706 %}
17707
17708 instruct divF_reg(regF dst, regF src) %{
17709 predicate(UseAVX == 0);
17710 match(Set dst (DivF dst src));
17711
17712 format %{ "divss $dst, $src" %}
17713 ins_cost(150);
17714 ins_encode %{
17715 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17716 %}
17717 ins_pipe(pipe_slow);
17718 %}
17719
17720 instruct divF_mem(regF dst, memory src) %{
17721 predicate(UseAVX == 0);
17722 match(Set dst (DivF dst (LoadF src)));
17723
17724 format %{ "divss $dst, $src" %}
17725 ins_cost(150);
17726 ins_encode %{
17727 __ divss($dst$$XMMRegister, $src$$Address);
17728 %}
17729 ins_pipe(pipe_slow);
17730 %}
17731
17732 instruct divF_imm(regF dst, immF con) %{
17733 predicate(UseAVX == 0);
17734 match(Set dst (DivF dst con));
17735 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17736 ins_cost(150);
17737 ins_encode %{
17738 __ divss($dst$$XMMRegister, $constantaddress($con));
17739 %}
17740 ins_pipe(pipe_slow);
17741 %}
17742
17743 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17744 predicate(UseAVX > 0);
17745 match(Set dst (DivF src1 src2));
17746
17747 format %{ "vdivss $dst, $src1, $src2" %}
17748 ins_cost(150);
17749 ins_encode %{
17750 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17751 %}
17752 ins_pipe(pipe_slow);
17753 %}
17754
17755 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17756 predicate(UseAVX > 0);
17757 match(Set dst (DivF src1 (LoadF src2)));
17758
17759 format %{ "vdivss $dst, $src1, $src2" %}
17760 ins_cost(150);
17761 ins_encode %{
17762 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17763 %}
17764 ins_pipe(pipe_slow);
17765 %}
17766
17767 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17768 predicate(UseAVX > 0);
17769 match(Set dst (DivF src con));
17770
17771 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17772 ins_cost(150);
17773 ins_encode %{
17774 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17775 %}
17776 ins_pipe(pipe_slow);
17777 %}
17778
17779 instruct divD_reg(regD dst, regD src) %{
17780 predicate(UseAVX == 0);
17781 match(Set dst (DivD dst src));
17782
17783 format %{ "divsd $dst, $src" %}
17784 ins_cost(150);
17785 ins_encode %{
17786 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17787 %}
17788 ins_pipe(pipe_slow);
17789 %}
17790
17791 instruct divD_mem(regD dst, memory src) %{
17792 predicate(UseAVX == 0);
17793 match(Set dst (DivD dst (LoadD src)));
17794
17795 format %{ "divsd $dst, $src" %}
17796 ins_cost(150);
17797 ins_encode %{
17798 __ divsd($dst$$XMMRegister, $src$$Address);
17799 %}
17800 ins_pipe(pipe_slow);
17801 %}
17802
17803 instruct divD_imm(regD dst, immD con) %{
17804 predicate(UseAVX == 0);
17805 match(Set dst (DivD dst con));
17806 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17807 ins_cost(150);
17808 ins_encode %{
17809 __ divsd($dst$$XMMRegister, $constantaddress($con));
17810 %}
17811 ins_pipe(pipe_slow);
17812 %}
17813
17814 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17815 predicate(UseAVX > 0);
17816 match(Set dst (DivD src1 src2));
17817
17818 format %{ "vdivsd $dst, $src1, $src2" %}
17819 ins_cost(150);
17820 ins_encode %{
17821 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17822 %}
17823 ins_pipe(pipe_slow);
17824 %}
17825
17826 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17827 predicate(UseAVX > 0);
17828 match(Set dst (DivD src1 (LoadD src2)));
17829
17830 format %{ "vdivsd $dst, $src1, $src2" %}
17831 ins_cost(150);
17832 ins_encode %{
17833 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17834 %}
17835 ins_pipe(pipe_slow);
17836 %}
17837
17838 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17839 predicate(UseAVX > 0);
17840 match(Set dst (DivD src con));
17841
17842 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17843 ins_cost(150);
17844 ins_encode %{
17845 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17846 %}
17847 ins_pipe(pipe_slow);
17848 %}
17849
17850 instruct absF_reg(regF dst) %{
17851 predicate(UseAVX == 0);
17852 match(Set dst (AbsF dst));
17853 ins_cost(150);
17854 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17855 ins_encode %{
17856 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17857 %}
17858 ins_pipe(pipe_slow);
17859 %}
17860
17861 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17862 predicate(UseAVX > 0);
17863 match(Set dst (AbsF src));
17864 ins_cost(150);
17865 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17866 ins_encode %{
17867 int vlen_enc = Assembler::AVX_128bit;
17868 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17869 ExternalAddress(float_signmask()), vlen_enc);
17870 %}
17871 ins_pipe(pipe_slow);
17872 %}
17873
17874 instruct absD_reg(regD dst) %{
17875 predicate(UseAVX == 0);
17876 match(Set dst (AbsD dst));
17877 ins_cost(150);
17878 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
17879 "# abs double by sign masking" %}
17880 ins_encode %{
17881 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17882 %}
17883 ins_pipe(pipe_slow);
17884 %}
17885
17886 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17887 predicate(UseAVX > 0);
17888 match(Set dst (AbsD src));
17889 ins_cost(150);
17890 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
17891 "# abs double by sign masking" %}
17892 ins_encode %{
17893 int vlen_enc = Assembler::AVX_128bit;
17894 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17895 ExternalAddress(double_signmask()), vlen_enc);
17896 %}
17897 ins_pipe(pipe_slow);
17898 %}
17899
17900 instruct negF_reg(regF dst) %{
17901 predicate(UseAVX == 0);
17902 match(Set dst (NegF dst));
17903 ins_cost(150);
17904 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
17905 ins_encode %{
17906 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17907 %}
17908 ins_pipe(pipe_slow);
17909 %}
17910
17911 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17912 predicate(UseAVX > 0);
17913 match(Set dst (NegF src));
17914 ins_cost(150);
17915 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17916 ins_encode %{
17917 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17918 ExternalAddress(float_signflip()));
17919 %}
17920 ins_pipe(pipe_slow);
17921 %}
17922
17923 instruct negD_reg(regD dst) %{
17924 predicate(UseAVX == 0);
17925 match(Set dst (NegD dst));
17926 ins_cost(150);
17927 format %{ "xorpd $dst, [0x8000000000000000]\t"
17928 "# neg double by sign flipping" %}
17929 ins_encode %{
17930 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17931 %}
17932 ins_pipe(pipe_slow);
17933 %}
17934
17935 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17936 predicate(UseAVX > 0);
17937 match(Set dst (NegD src));
17938 ins_cost(150);
17939 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
17940 "# neg double by sign flipping" %}
17941 ins_encode %{
17942 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17943 ExternalAddress(double_signflip()));
17944 %}
17945 ins_pipe(pipe_slow);
17946 %}
17947
17948 // sqrtss instruction needs destination register to be pre initialized for best performance
17949 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17950 instruct sqrtF_reg(regF dst) %{
17951 match(Set dst (SqrtF dst));
17952 format %{ "sqrtss $dst, $dst" %}
17953 ins_encode %{
17954 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17955 %}
17956 ins_pipe(pipe_slow);
17957 %}
17958
17959 // sqrtsd instruction needs destination register to be pre initialized for best performance
17960 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17961 instruct sqrtD_reg(regD dst) %{
17962 match(Set dst (SqrtD dst));
17963 format %{ "sqrtsd $dst, $dst" %}
17964 ins_encode %{
17965 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17966 %}
17967 ins_pipe(pipe_slow);
17968 %}
17969
17970 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17971 effect(TEMP tmp);
17972 match(Set dst (ConvF2HF src));
17973 ins_cost(125);
17974 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17975 ins_encode %{
17976 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17977 %}
17978 ins_pipe( pipe_slow );
17979 %}
17980
17981 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17982 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17983 effect(TEMP ktmp, TEMP rtmp);
17984 match(Set mem (StoreC mem (ConvF2HF src)));
17985 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17986 ins_encode %{
17987 __ movl($rtmp$$Register, 0x1);
17988 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17989 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17990 %}
17991 ins_pipe( pipe_slow );
17992 %}
17993
17994 instruct vconvF2HF(vec dst, vec src) %{
17995 match(Set dst (VectorCastF2HF src));
17996 format %{ "vector_conv_F2HF $dst $src" %}
17997 ins_encode %{
17998 int vlen_enc = vector_length_encoding(this, $src);
17999 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18000 %}
18001 ins_pipe( pipe_slow );
18002 %}
18003
18004 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18005 predicate(n->as_StoreVector()->memory_size() >= 16);
18006 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18007 format %{ "vcvtps2ph $mem,$src" %}
18008 ins_encode %{
18009 int vlen_enc = vector_length_encoding(this, $src);
18010 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18011 %}
18012 ins_pipe( pipe_slow );
18013 %}
18014
18015 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18016 match(Set dst (ConvHF2F src));
18017 format %{ "vcvtph2ps $dst,$src" %}
18018 ins_encode %{
18019 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18020 %}
18021 ins_pipe( pipe_slow );
18022 %}
18023
18024 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18025 match(Set dst (VectorCastHF2F (LoadVector mem)));
18026 format %{ "vcvtph2ps $dst,$mem" %}
18027 ins_encode %{
18028 int vlen_enc = vector_length_encoding(this);
18029 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18030 %}
18031 ins_pipe( pipe_slow );
18032 %}
18033
18034 instruct vconvHF2F(vec dst, vec src) %{
18035 match(Set dst (VectorCastHF2F src));
18036 ins_cost(125);
18037 format %{ "vector_conv_HF2F $dst,$src" %}
18038 ins_encode %{
18039 int vlen_enc = vector_length_encoding(this);
18040 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18041 %}
18042 ins_pipe( pipe_slow );
18043 %}
18044
18045 // ---------------------------------------- VectorReinterpret ------------------------------------
18046 instruct reinterpret_mask(kReg dst) %{
18047 predicate(n->bottom_type()->isa_vectmask() &&
18048 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18049 match(Set dst (VectorReinterpret dst));
18050 ins_cost(125);
18051 format %{ "vector_reinterpret $dst\t!" %}
18052 ins_encode %{
18053 // empty
18054 %}
18055 ins_pipe( pipe_slow );
18056 %}
18057
18058 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18059 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18060 n->bottom_type()->isa_vectmask() &&
18061 n->in(1)->bottom_type()->isa_vectmask() &&
18062 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18063 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18064 match(Set dst (VectorReinterpret src));
18065 effect(TEMP xtmp);
18066 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18067 ins_encode %{
18068 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18069 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18070 assert(src_sz == dst_sz , "src and dst size mismatch");
18071 int vlen_enc = vector_length_encoding(src_sz);
18072 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18073 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18074 %}
18075 ins_pipe( pipe_slow );
18076 %}
18077
18078 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18079 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18080 n->bottom_type()->isa_vectmask() &&
18081 n->in(1)->bottom_type()->isa_vectmask() &&
18082 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18083 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18084 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18085 match(Set dst (VectorReinterpret src));
18086 effect(TEMP xtmp);
18087 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18088 ins_encode %{
18089 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18090 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18091 assert(src_sz == dst_sz , "src and dst size mismatch");
18092 int vlen_enc = vector_length_encoding(src_sz);
18093 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18094 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18095 %}
18096 ins_pipe( pipe_slow );
18097 %}
18098
18099 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18100 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18101 n->bottom_type()->isa_vectmask() &&
18102 n->in(1)->bottom_type()->isa_vectmask() &&
18103 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18104 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18105 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18106 match(Set dst (VectorReinterpret src));
18107 effect(TEMP xtmp);
18108 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18109 ins_encode %{
18110 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18111 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18112 assert(src_sz == dst_sz , "src and dst size mismatch");
18113 int vlen_enc = vector_length_encoding(src_sz);
18114 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18115 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18116 %}
18117 ins_pipe( pipe_slow );
18118 %}
18119
18120 instruct reinterpret(vec dst) %{
18121 predicate(!n->bottom_type()->isa_vectmask() &&
18122 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18123 match(Set dst (VectorReinterpret dst));
18124 ins_cost(125);
18125 format %{ "vector_reinterpret $dst\t!" %}
18126 ins_encode %{
18127 // empty
18128 %}
18129 ins_pipe( pipe_slow );
18130 %}
18131
18132 instruct reinterpret_expand(vec dst, vec src) %{
18133 predicate(UseAVX == 0 &&
18134 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18135 match(Set dst (VectorReinterpret src));
18136 ins_cost(125);
18137 effect(TEMP dst);
18138 format %{ "vector_reinterpret_expand $dst,$src" %}
18139 ins_encode %{
18140 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18141 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18142
18143 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18144 if (src_vlen_in_bytes == 4) {
18145 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18146 } else {
18147 assert(src_vlen_in_bytes == 8, "");
18148 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18149 }
18150 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18151 %}
18152 ins_pipe( pipe_slow );
18153 %}
18154
18155 instruct vreinterpret_expand4(legVec dst, vec src) %{
18156 predicate(UseAVX > 0 &&
18157 !n->bottom_type()->isa_vectmask() &&
18158 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18159 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18160 match(Set dst (VectorReinterpret src));
18161 ins_cost(125);
18162 format %{ "vector_reinterpret_expand $dst,$src" %}
18163 ins_encode %{
18164 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18165 %}
18166 ins_pipe( pipe_slow );
18167 %}
18168
18169
18170 instruct vreinterpret_expand(legVec dst, vec src) %{
18171 predicate(UseAVX > 0 &&
18172 !n->bottom_type()->isa_vectmask() &&
18173 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18174 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18175 match(Set dst (VectorReinterpret src));
18176 ins_cost(125);
18177 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18178 ins_encode %{
18179 switch (Matcher::vector_length_in_bytes(this, $src)) {
18180 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18181 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18182 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18183 default: ShouldNotReachHere();
18184 }
18185 %}
18186 ins_pipe( pipe_slow );
18187 %}
18188
18189 instruct reinterpret_shrink(vec dst, legVec src) %{
18190 predicate(!n->bottom_type()->isa_vectmask() &&
18191 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18192 match(Set dst (VectorReinterpret src));
18193 ins_cost(125);
18194 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18195 ins_encode %{
18196 switch (Matcher::vector_length_in_bytes(this)) {
18197 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18198 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18199 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18200 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18201 default: ShouldNotReachHere();
18202 }
18203 %}
18204 ins_pipe( pipe_slow );
18205 %}
18206
18207 // ----------------------------------------------------------------------------------------------------
18208
18209 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18210 match(Set dst (RoundDoubleMode src rmode));
18211 format %{ "roundsd $dst,$src" %}
18212 ins_cost(150);
18213 ins_encode %{
18214 assert(UseSSE >= 4, "required");
18215 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18216 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18217 }
18218 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18219 %}
18220 ins_pipe(pipe_slow);
18221 %}
18222
18223 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18224 match(Set dst (RoundDoubleMode con rmode));
18225 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18226 ins_cost(150);
18227 ins_encode %{
18228 assert(UseSSE >= 4, "required");
18229 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18230 %}
18231 ins_pipe(pipe_slow);
18232 %}
18233
18234 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18235 predicate(Matcher::vector_length(n) < 8);
18236 match(Set dst (RoundDoubleModeV src rmode));
18237 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18238 ins_encode %{
18239 assert(UseAVX > 0, "required");
18240 int vlen_enc = vector_length_encoding(this);
18241 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18242 %}
18243 ins_pipe( pipe_slow );
18244 %}
18245
18246 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18247 predicate(Matcher::vector_length(n) == 8);
18248 match(Set dst (RoundDoubleModeV src rmode));
18249 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18250 ins_encode %{
18251 assert(UseAVX > 2, "required");
18252 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18253 %}
18254 ins_pipe( pipe_slow );
18255 %}
18256
18257 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18258 predicate(Matcher::vector_length(n) < 8);
18259 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18260 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18261 ins_encode %{
18262 assert(UseAVX > 0, "required");
18263 int vlen_enc = vector_length_encoding(this);
18264 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18265 %}
18266 ins_pipe( pipe_slow );
18267 %}
18268
18269 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18270 predicate(Matcher::vector_length(n) == 8);
18271 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18272 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18273 ins_encode %{
18274 assert(UseAVX > 2, "required");
18275 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18276 %}
18277 ins_pipe( pipe_slow );
18278 %}
18279
18280 instruct onspinwait() %{
18281 match(OnSpinWait);
18282 ins_cost(200);
18283
18284 format %{
18285 $$template
18286 $$emit$$"pause\t! membar_onspinwait"
18287 %}
18288 ins_encode %{
18289 __ pause();
18290 %}
18291 ins_pipe(pipe_slow);
18292 %}
18293
18294 // a * b + c
18295 instruct fmaD_reg(regD a, regD b, regD c) %{
18296 match(Set c (FmaD c (Binary a b)));
18297 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18298 ins_cost(150);
18299 ins_encode %{
18300 assert(UseFMA, "Needs FMA instructions support.");
18301 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18302 %}
18303 ins_pipe( pipe_slow );
18304 %}
18305
18306 // a * b + c
18307 instruct fmaF_reg(regF a, regF b, regF c) %{
18308 match(Set c (FmaF c (Binary a b)));
18309 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18310 ins_cost(150);
18311 ins_encode %{
18312 assert(UseFMA, "Needs FMA instructions support.");
18313 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18314 %}
18315 ins_pipe( pipe_slow );
18316 %}
18317
18318 // ====================VECTOR INSTRUCTIONS=====================================
18319
18320 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18321 instruct MoveVec2Leg(legVec dst, vec src) %{
18322 match(Set dst src);
18323 format %{ "" %}
18324 ins_encode %{
18325 ShouldNotReachHere();
18326 %}
18327 ins_pipe( fpu_reg_reg );
18328 %}
18329
18330 instruct MoveLeg2Vec(vec dst, legVec src) %{
18331 match(Set dst src);
18332 format %{ "" %}
18333 ins_encode %{
18334 ShouldNotReachHere();
18335 %}
18336 ins_pipe( fpu_reg_reg );
18337 %}
18338
18339 // ============================================================================
18340
18341 // Load vectors generic operand pattern
18342 instruct loadV(vec dst, memory mem) %{
18343 match(Set dst (LoadVector mem));
18344 ins_cost(125);
18345 format %{ "load_vector $dst,$mem" %}
18346 ins_encode %{
18347 BasicType bt = Matcher::vector_element_basic_type(this);
18348 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18349 %}
18350 ins_pipe( pipe_slow );
18351 %}
18352
18353 // Store vectors generic operand pattern.
18354 instruct storeV(memory mem, vec src) %{
18355 match(Set mem (StoreVector mem src));
18356 ins_cost(145);
18357 format %{ "store_vector $mem,$src\n\t" %}
18358 ins_encode %{
18359 switch (Matcher::vector_length_in_bytes(this, $src)) {
18360 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18361 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18362 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18363 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18364 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18365 default: ShouldNotReachHere();
18366 }
18367 %}
18368 ins_pipe( pipe_slow );
18369 %}
18370
18371 // ---------------------------------------- Gather ------------------------------------
18372
18373 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18374
18375 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18376 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18377 Matcher::vector_length_in_bytes(n) <= 32);
18378 match(Set dst (LoadVectorGather mem idx));
18379 effect(TEMP dst, TEMP tmp, TEMP mask);
18380 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18381 ins_encode %{
18382 int vlen_enc = vector_length_encoding(this);
18383 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18384 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18385 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18386 __ lea($tmp$$Register, $mem$$Address);
18387 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18388 %}
18389 ins_pipe( pipe_slow );
18390 %}
18391
18392
18393 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18394 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18395 !is_subword_type(Matcher::vector_element_basic_type(n)));
18396 match(Set dst (LoadVectorGather mem idx));
18397 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18398 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18399 ins_encode %{
18400 int vlen_enc = vector_length_encoding(this);
18401 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18402 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18403 __ lea($tmp$$Register, $mem$$Address);
18404 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18405 %}
18406 ins_pipe( pipe_slow );
18407 %}
18408
18409 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18410 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18411 !is_subword_type(Matcher::vector_element_basic_type(n)));
18412 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18413 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18414 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18415 ins_encode %{
18416 assert(UseAVX > 2, "sanity");
18417 int vlen_enc = vector_length_encoding(this);
18418 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18419 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18420 // Note: Since gather instruction partially updates the opmask register used
18421 // for predication hense moving mask operand to a temporary.
18422 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18423 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18424 __ lea($tmp$$Register, $mem$$Address);
18425 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18426 %}
18427 ins_pipe( pipe_slow );
18428 %}
18429
18430 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18431 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18432 match(Set dst (LoadVectorGather mem idx_base));
18433 effect(TEMP tmp, TEMP rtmp);
18434 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18435 ins_encode %{
18436 int vlen_enc = vector_length_encoding(this);
18437 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18438 __ lea($tmp$$Register, $mem$$Address);
18439 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18440 %}
18441 ins_pipe( pipe_slow );
18442 %}
18443
18444 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18445 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18446 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18447 match(Set dst (LoadVectorGather mem idx_base));
18448 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18449 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18450 ins_encode %{
18451 int vlen_enc = vector_length_encoding(this);
18452 int vector_len = Matcher::vector_length(this);
18453 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18454 __ lea($tmp$$Register, $mem$$Address);
18455 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18456 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18457 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18458 %}
18459 ins_pipe( pipe_slow );
18460 %}
18461
18462 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18463 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18464 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18465 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18466 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18467 ins_encode %{
18468 int vlen_enc = vector_length_encoding(this);
18469 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18470 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18471 __ lea($tmp$$Register, $mem$$Address);
18472 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18473 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18474 %}
18475 ins_pipe( pipe_slow );
18476 %}
18477
18478 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18479 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18480 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18481 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18482 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18483 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18484 ins_encode %{
18485 int vlen_enc = vector_length_encoding(this);
18486 int vector_len = Matcher::vector_length(this);
18487 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18488 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18489 __ lea($tmp$$Register, $mem$$Address);
18490 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18491 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18492 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18493 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18494 %}
18495 ins_pipe( pipe_slow );
18496 %}
18497
18498 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18499 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18500 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18501 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18502 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18503 ins_encode %{
18504 int vlen_enc = vector_length_encoding(this);
18505 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18506 __ lea($tmp$$Register, $mem$$Address);
18507 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18508 if (elem_bt == T_SHORT) {
18509 __ movl($mask_idx$$Register, 0x55555555);
18510 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18511 }
18512 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18513 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18514 %}
18515 ins_pipe( pipe_slow );
18516 %}
18517
18518 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18519 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18520 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18521 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18522 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18523 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18524 ins_encode %{
18525 int vlen_enc = vector_length_encoding(this);
18526 int vector_len = Matcher::vector_length(this);
18527 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18528 __ lea($tmp$$Register, $mem$$Address);
18529 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18530 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18531 if (elem_bt == T_SHORT) {
18532 __ movl($mask_idx$$Register, 0x55555555);
18533 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18534 }
18535 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18536 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18537 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18538 %}
18539 ins_pipe( pipe_slow );
18540 %}
18541
18542 // ====================Scatter=======================================
18543
18544 // Scatter INT, LONG, FLOAT, DOUBLE
18545
18546 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18547 predicate(UseAVX > 2);
18548 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18549 effect(TEMP tmp, TEMP ktmp);
18550 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18551 ins_encode %{
18552 int vlen_enc = vector_length_encoding(this, $src);
18553 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18554
18555 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18556 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18557
18558 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18559 __ lea($tmp$$Register, $mem$$Address);
18560 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18561 %}
18562 ins_pipe( pipe_slow );
18563 %}
18564
18565 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18566 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18567 effect(TEMP tmp, TEMP ktmp);
18568 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18569 ins_encode %{
18570 int vlen_enc = vector_length_encoding(this, $src);
18571 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18572 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18573 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18574 // Note: Since scatter instruction partially updates the opmask register used
18575 // for predication hense moving mask operand to a temporary.
18576 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18577 __ lea($tmp$$Register, $mem$$Address);
18578 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18579 %}
18580 ins_pipe( pipe_slow );
18581 %}
18582
18583 // ====================REPLICATE=======================================
18584
18585 // Replicate byte scalar to be vector
18586 instruct vReplB_reg(vec dst, rRegI src) %{
18587 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18588 match(Set dst (Replicate src));
18589 format %{ "replicateB $dst,$src" %}
18590 ins_encode %{
18591 uint vlen = Matcher::vector_length(this);
18592 if (UseAVX >= 2) {
18593 int vlen_enc = vector_length_encoding(this);
18594 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18595 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18596 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18597 } else {
18598 __ movdl($dst$$XMMRegister, $src$$Register);
18599 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18600 }
18601 } else {
18602 assert(UseAVX < 2, "");
18603 __ movdl($dst$$XMMRegister, $src$$Register);
18604 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18605 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18606 if (vlen >= 16) {
18607 assert(vlen == 16, "");
18608 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18609 }
18610 }
18611 %}
18612 ins_pipe( pipe_slow );
18613 %}
18614
18615 instruct ReplB_mem(vec dst, memory mem) %{
18616 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18617 match(Set dst (Replicate (LoadB mem)));
18618 format %{ "replicateB $dst,$mem" %}
18619 ins_encode %{
18620 int vlen_enc = vector_length_encoding(this);
18621 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18622 %}
18623 ins_pipe( pipe_slow );
18624 %}
18625
18626 // ====================ReplicateS=======================================
18627
18628 instruct vReplS_reg(vec dst, rRegI src) %{
18629 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18630 match(Set dst (Replicate src));
18631 format %{ "replicateS $dst,$src" %}
18632 ins_encode %{
18633 uint vlen = Matcher::vector_length(this);
18634 int vlen_enc = vector_length_encoding(this);
18635 if (UseAVX >= 2) {
18636 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18637 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18638 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18639 } else {
18640 __ movdl($dst$$XMMRegister, $src$$Register);
18641 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18642 }
18643 } else {
18644 assert(UseAVX < 2, "");
18645 __ movdl($dst$$XMMRegister, $src$$Register);
18646 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18647 if (vlen >= 8) {
18648 assert(vlen == 8, "");
18649 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18650 }
18651 }
18652 %}
18653 ins_pipe( pipe_slow );
18654 %}
18655
18656 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18657 match(Set dst (Replicate con));
18658 effect(TEMP rtmp);
18659 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18660 ins_encode %{
18661 int vlen_enc = vector_length_encoding(this);
18662 BasicType bt = Matcher::vector_element_basic_type(this);
18663 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18664 __ movl($rtmp$$Register, $con$$constant);
18665 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18666 %}
18667 ins_pipe( pipe_slow );
18668 %}
18669
18670 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18671 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18672 match(Set dst (Replicate src));
18673 effect(TEMP rtmp);
18674 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18675 ins_encode %{
18676 int vlen_enc = vector_length_encoding(this);
18677 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18678 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18679 %}
18680 ins_pipe( pipe_slow );
18681 %}
18682
18683 instruct ReplS_mem(vec dst, memory mem) %{
18684 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18685 match(Set dst (Replicate (LoadS mem)));
18686 format %{ "replicateS $dst,$mem" %}
18687 ins_encode %{
18688 int vlen_enc = vector_length_encoding(this);
18689 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18690 %}
18691 ins_pipe( pipe_slow );
18692 %}
18693
18694 // ====================ReplicateI=======================================
18695
18696 instruct ReplI_reg(vec dst, rRegI src) %{
18697 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18698 match(Set dst (Replicate src));
18699 format %{ "replicateI $dst,$src" %}
18700 ins_encode %{
18701 uint vlen = Matcher::vector_length(this);
18702 int vlen_enc = vector_length_encoding(this);
18703 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18704 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18705 } else if (VM_Version::supports_avx2()) {
18706 __ movdl($dst$$XMMRegister, $src$$Register);
18707 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18708 } else {
18709 __ movdl($dst$$XMMRegister, $src$$Register);
18710 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18711 }
18712 %}
18713 ins_pipe( pipe_slow );
18714 %}
18715
18716 instruct ReplI_mem(vec dst, memory mem) %{
18717 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18718 match(Set dst (Replicate (LoadI mem)));
18719 format %{ "replicateI $dst,$mem" %}
18720 ins_encode %{
18721 int vlen_enc = vector_length_encoding(this);
18722 if (VM_Version::supports_avx2()) {
18723 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18724 } else if (VM_Version::supports_avx()) {
18725 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18726 } else {
18727 __ movdl($dst$$XMMRegister, $mem$$Address);
18728 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18729 }
18730 %}
18731 ins_pipe( pipe_slow );
18732 %}
18733
18734 instruct ReplI_imm(vec dst, immI con) %{
18735 predicate(Matcher::is_non_long_integral_vector(n));
18736 match(Set dst (Replicate con));
18737 format %{ "replicateI $dst,$con" %}
18738 ins_encode %{
18739 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18740 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18741 type2aelembytes(Matcher::vector_element_basic_type(this))));
18742 BasicType bt = Matcher::vector_element_basic_type(this);
18743 int vlen = Matcher::vector_length_in_bytes(this);
18744 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18745 %}
18746 ins_pipe( pipe_slow );
18747 %}
18748
18749 // Replicate scalar zero to be vector
18750 instruct ReplI_zero(vec dst, immI_0 zero) %{
18751 predicate(Matcher::is_non_long_integral_vector(n));
18752 match(Set dst (Replicate zero));
18753 format %{ "replicateI $dst,$zero" %}
18754 ins_encode %{
18755 int vlen_enc = vector_length_encoding(this);
18756 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18757 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18758 } else {
18759 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18760 }
18761 %}
18762 ins_pipe( fpu_reg_reg );
18763 %}
18764
18765 instruct ReplI_M1(vec dst, immI_M1 con) %{
18766 predicate(Matcher::is_non_long_integral_vector(n));
18767 match(Set dst (Replicate con));
18768 format %{ "vallones $dst" %}
18769 ins_encode %{
18770 int vector_len = vector_length_encoding(this);
18771 __ vallones($dst$$XMMRegister, vector_len);
18772 %}
18773 ins_pipe( pipe_slow );
18774 %}
18775
18776 // ====================ReplicateL=======================================
18777
18778 // Replicate long (8 byte) scalar to be vector
18779 instruct ReplL_reg(vec dst, rRegL src) %{
18780 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18781 match(Set dst (Replicate src));
18782 format %{ "replicateL $dst,$src" %}
18783 ins_encode %{
18784 int vlen = Matcher::vector_length(this);
18785 int vlen_enc = vector_length_encoding(this);
18786 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18787 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18788 } else if (VM_Version::supports_avx2()) {
18789 __ movdq($dst$$XMMRegister, $src$$Register);
18790 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18791 } else {
18792 __ movdq($dst$$XMMRegister, $src$$Register);
18793 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18794 }
18795 %}
18796 ins_pipe( pipe_slow );
18797 %}
18798
18799 instruct ReplL_mem(vec dst, memory mem) %{
18800 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18801 match(Set dst (Replicate (LoadL mem)));
18802 format %{ "replicateL $dst,$mem" %}
18803 ins_encode %{
18804 int vlen_enc = vector_length_encoding(this);
18805 if (VM_Version::supports_avx2()) {
18806 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18807 } else if (VM_Version::supports_sse3()) {
18808 __ movddup($dst$$XMMRegister, $mem$$Address);
18809 } else {
18810 __ movq($dst$$XMMRegister, $mem$$Address);
18811 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18812 }
18813 %}
18814 ins_pipe( pipe_slow );
18815 %}
18816
18817 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18818 instruct ReplL_imm(vec dst, immL con) %{
18819 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18820 match(Set dst (Replicate con));
18821 format %{ "replicateL $dst,$con" %}
18822 ins_encode %{
18823 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18824 int vlen = Matcher::vector_length_in_bytes(this);
18825 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18826 %}
18827 ins_pipe( pipe_slow );
18828 %}
18829
18830 instruct ReplL_zero(vec dst, immL0 zero) %{
18831 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18832 match(Set dst (Replicate zero));
18833 format %{ "replicateL $dst,$zero" %}
18834 ins_encode %{
18835 int vlen_enc = vector_length_encoding(this);
18836 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18837 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18838 } else {
18839 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18840 }
18841 %}
18842 ins_pipe( fpu_reg_reg );
18843 %}
18844
18845 instruct ReplL_M1(vec dst, immL_M1 con) %{
18846 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18847 match(Set dst (Replicate con));
18848 format %{ "vallones $dst" %}
18849 ins_encode %{
18850 int vector_len = vector_length_encoding(this);
18851 __ vallones($dst$$XMMRegister, vector_len);
18852 %}
18853 ins_pipe( pipe_slow );
18854 %}
18855
18856 // ====================ReplicateF=======================================
18857
18858 instruct vReplF_reg(vec dst, vlRegF src) %{
18859 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18860 match(Set dst (Replicate src));
18861 format %{ "replicateF $dst,$src" %}
18862 ins_encode %{
18863 uint vlen = Matcher::vector_length(this);
18864 int vlen_enc = vector_length_encoding(this);
18865 if (vlen <= 4) {
18866 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18867 } else if (VM_Version::supports_avx2()) {
18868 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18869 } else {
18870 assert(vlen == 8, "sanity");
18871 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18872 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18873 }
18874 %}
18875 ins_pipe( pipe_slow );
18876 %}
18877
18878 instruct ReplF_reg(vec dst, vlRegF src) %{
18879 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18880 match(Set dst (Replicate src));
18881 format %{ "replicateF $dst,$src" %}
18882 ins_encode %{
18883 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18884 %}
18885 ins_pipe( pipe_slow );
18886 %}
18887
18888 instruct ReplF_mem(vec dst, memory mem) %{
18889 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18890 match(Set dst (Replicate (LoadF mem)));
18891 format %{ "replicateF $dst,$mem" %}
18892 ins_encode %{
18893 int vlen_enc = vector_length_encoding(this);
18894 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18895 %}
18896 ins_pipe( pipe_slow );
18897 %}
18898
18899 // Replicate float scalar immediate to be vector by loading from const table.
18900 instruct ReplF_imm(vec dst, immF con) %{
18901 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18902 match(Set dst (Replicate con));
18903 format %{ "replicateF $dst,$con" %}
18904 ins_encode %{
18905 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18906 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18907 int vlen = Matcher::vector_length_in_bytes(this);
18908 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18909 %}
18910 ins_pipe( pipe_slow );
18911 %}
18912
18913 instruct ReplF_zero(vec dst, immF0 zero) %{
18914 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18915 match(Set dst (Replicate zero));
18916 format %{ "replicateF $dst,$zero" %}
18917 ins_encode %{
18918 int vlen_enc = vector_length_encoding(this);
18919 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18920 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18921 } else {
18922 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18923 }
18924 %}
18925 ins_pipe( fpu_reg_reg );
18926 %}
18927
18928 // ====================ReplicateD=======================================
18929
18930 // Replicate double (8 bytes) scalar to be vector
18931 instruct vReplD_reg(vec dst, vlRegD src) %{
18932 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18933 match(Set dst (Replicate src));
18934 format %{ "replicateD $dst,$src" %}
18935 ins_encode %{
18936 uint vlen = Matcher::vector_length(this);
18937 int vlen_enc = vector_length_encoding(this);
18938 if (vlen <= 2) {
18939 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18940 } else if (VM_Version::supports_avx2()) {
18941 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18942 } else {
18943 assert(vlen == 4, "sanity");
18944 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18945 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18946 }
18947 %}
18948 ins_pipe( pipe_slow );
18949 %}
18950
18951 instruct ReplD_reg(vec dst, vlRegD src) %{
18952 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18953 match(Set dst (Replicate src));
18954 format %{ "replicateD $dst,$src" %}
18955 ins_encode %{
18956 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18957 %}
18958 ins_pipe( pipe_slow );
18959 %}
18960
18961 instruct ReplD_mem(vec dst, memory mem) %{
18962 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18963 match(Set dst (Replicate (LoadD mem)));
18964 format %{ "replicateD $dst,$mem" %}
18965 ins_encode %{
18966 if (Matcher::vector_length(this) >= 4) {
18967 int vlen_enc = vector_length_encoding(this);
18968 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18969 } else {
18970 __ movddup($dst$$XMMRegister, $mem$$Address);
18971 }
18972 %}
18973 ins_pipe( pipe_slow );
18974 %}
18975
18976 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18977 instruct ReplD_imm(vec dst, immD con) %{
18978 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18979 match(Set dst (Replicate con));
18980 format %{ "replicateD $dst,$con" %}
18981 ins_encode %{
18982 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18983 int vlen = Matcher::vector_length_in_bytes(this);
18984 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18985 %}
18986 ins_pipe( pipe_slow );
18987 %}
18988
18989 instruct ReplD_zero(vec dst, immD0 zero) %{
18990 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18991 match(Set dst (Replicate zero));
18992 format %{ "replicateD $dst,$zero" %}
18993 ins_encode %{
18994 int vlen_enc = vector_length_encoding(this);
18995 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18996 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18997 } else {
18998 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18999 }
19000 %}
19001 ins_pipe( fpu_reg_reg );
19002 %}
19003
19004 // ====================VECTOR INSERT=======================================
19005
19006 instruct insert(vec dst, rRegI val, immU8 idx) %{
19007 predicate(Matcher::vector_length_in_bytes(n) < 32);
19008 match(Set dst (VectorInsert (Binary dst val) idx));
19009 format %{ "vector_insert $dst,$val,$idx" %}
19010 ins_encode %{
19011 assert(UseSSE >= 4, "required");
19012 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19013
19014 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19015
19016 assert(is_integral_type(elem_bt), "");
19017 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19018
19019 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19020 %}
19021 ins_pipe( pipe_slow );
19022 %}
19023
19024 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19025 predicate(Matcher::vector_length_in_bytes(n) == 32);
19026 match(Set dst (VectorInsert (Binary src val) idx));
19027 effect(TEMP vtmp);
19028 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19029 ins_encode %{
19030 int vlen_enc = Assembler::AVX_256bit;
19031 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19032 int elem_per_lane = 16/type2aelembytes(elem_bt);
19033 int log2epr = log2(elem_per_lane);
19034
19035 assert(is_integral_type(elem_bt), "sanity");
19036 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19037
19038 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19039 uint y_idx = ($idx$$constant >> log2epr) & 1;
19040 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19041 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19042 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19043 %}
19044 ins_pipe( pipe_slow );
19045 %}
19046
19047 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19048 predicate(Matcher::vector_length_in_bytes(n) == 64);
19049 match(Set dst (VectorInsert (Binary src val) idx));
19050 effect(TEMP vtmp);
19051 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19052 ins_encode %{
19053 assert(UseAVX > 2, "sanity");
19054
19055 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19056 int elem_per_lane = 16/type2aelembytes(elem_bt);
19057 int log2epr = log2(elem_per_lane);
19058
19059 assert(is_integral_type(elem_bt), "");
19060 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19061
19062 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19063 uint y_idx = ($idx$$constant >> log2epr) & 3;
19064 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19065 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19066 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19067 %}
19068 ins_pipe( pipe_slow );
19069 %}
19070
19071 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19072 predicate(Matcher::vector_length(n) == 2);
19073 match(Set dst (VectorInsert (Binary dst val) idx));
19074 format %{ "vector_insert $dst,$val,$idx" %}
19075 ins_encode %{
19076 assert(UseSSE >= 4, "required");
19077 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19078 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19079
19080 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19081 %}
19082 ins_pipe( pipe_slow );
19083 %}
19084
19085 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19086 predicate(Matcher::vector_length(n) == 4);
19087 match(Set dst (VectorInsert (Binary src val) idx));
19088 effect(TEMP vtmp);
19089 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19090 ins_encode %{
19091 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19092 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19093
19094 uint x_idx = $idx$$constant & right_n_bits(1);
19095 uint y_idx = ($idx$$constant >> 1) & 1;
19096 int vlen_enc = Assembler::AVX_256bit;
19097 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19098 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19099 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19100 %}
19101 ins_pipe( pipe_slow );
19102 %}
19103
19104 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19105 predicate(Matcher::vector_length(n) == 8);
19106 match(Set dst (VectorInsert (Binary src val) idx));
19107 effect(TEMP vtmp);
19108 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19109 ins_encode %{
19110 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19111 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19112
19113 uint x_idx = $idx$$constant & right_n_bits(1);
19114 uint y_idx = ($idx$$constant >> 1) & 3;
19115 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19116 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19117 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19118 %}
19119 ins_pipe( pipe_slow );
19120 %}
19121
19122 instruct insertF(vec dst, regF val, immU8 idx) %{
19123 predicate(Matcher::vector_length(n) < 8);
19124 match(Set dst (VectorInsert (Binary dst val) idx));
19125 format %{ "vector_insert $dst,$val,$idx" %}
19126 ins_encode %{
19127 assert(UseSSE >= 4, "sanity");
19128
19129 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19130 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19131
19132 uint x_idx = $idx$$constant & right_n_bits(2);
19133 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19134 %}
19135 ins_pipe( pipe_slow );
19136 %}
19137
19138 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19139 predicate(Matcher::vector_length(n) >= 8);
19140 match(Set dst (VectorInsert (Binary src val) idx));
19141 effect(TEMP vtmp);
19142 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19143 ins_encode %{
19144 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19145 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19146
19147 int vlen = Matcher::vector_length(this);
19148 uint x_idx = $idx$$constant & right_n_bits(2);
19149 if (vlen == 8) {
19150 uint y_idx = ($idx$$constant >> 2) & 1;
19151 int vlen_enc = Assembler::AVX_256bit;
19152 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19153 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19154 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19155 } else {
19156 assert(vlen == 16, "sanity");
19157 uint y_idx = ($idx$$constant >> 2) & 3;
19158 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19159 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19160 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19161 }
19162 %}
19163 ins_pipe( pipe_slow );
19164 %}
19165
19166 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19167 predicate(Matcher::vector_length(n) == 2);
19168 match(Set dst (VectorInsert (Binary dst val) idx));
19169 effect(TEMP tmp);
19170 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19171 ins_encode %{
19172 assert(UseSSE >= 4, "sanity");
19173 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19174 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19175
19176 __ movq($tmp$$Register, $val$$XMMRegister);
19177 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19178 %}
19179 ins_pipe( pipe_slow );
19180 %}
19181
19182 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19183 predicate(Matcher::vector_length(n) == 4);
19184 match(Set dst (VectorInsert (Binary src val) idx));
19185 effect(TEMP vtmp, TEMP tmp);
19186 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19187 ins_encode %{
19188 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19189 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19190
19191 uint x_idx = $idx$$constant & right_n_bits(1);
19192 uint y_idx = ($idx$$constant >> 1) & 1;
19193 int vlen_enc = Assembler::AVX_256bit;
19194 __ movq($tmp$$Register, $val$$XMMRegister);
19195 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19196 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19197 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19198 %}
19199 ins_pipe( pipe_slow );
19200 %}
19201
19202 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19203 predicate(Matcher::vector_length(n) == 8);
19204 match(Set dst (VectorInsert (Binary src val) idx));
19205 effect(TEMP tmp, TEMP vtmp);
19206 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19207 ins_encode %{
19208 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19209 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19210
19211 uint x_idx = $idx$$constant & right_n_bits(1);
19212 uint y_idx = ($idx$$constant >> 1) & 3;
19213 __ movq($tmp$$Register, $val$$XMMRegister);
19214 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19215 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19216 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19217 %}
19218 ins_pipe( pipe_slow );
19219 %}
19220
19221 // ====================REDUCTION ARITHMETIC=======================================
19222
19223 // =======================Int Reduction==========================================
19224
19225 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19226 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19227 match(Set dst (AddReductionVI src1 src2));
19228 match(Set dst (MulReductionVI src1 src2));
19229 match(Set dst (AndReductionV src1 src2));
19230 match(Set dst ( OrReductionV src1 src2));
19231 match(Set dst (XorReductionV src1 src2));
19232 match(Set dst (MinReductionV src1 src2));
19233 match(Set dst (MaxReductionV src1 src2));
19234 effect(TEMP vtmp1, TEMP vtmp2);
19235 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19236 ins_encode %{
19237 int opcode = this->ideal_Opcode();
19238 int vlen = Matcher::vector_length(this, $src2);
19239 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19240 %}
19241 ins_pipe( pipe_slow );
19242 %}
19243
19244 // =======================Long Reduction==========================================
19245
19246 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19247 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19248 match(Set dst (AddReductionVL src1 src2));
19249 match(Set dst (MulReductionVL src1 src2));
19250 match(Set dst (AndReductionV src1 src2));
19251 match(Set dst ( OrReductionV src1 src2));
19252 match(Set dst (XorReductionV src1 src2));
19253 match(Set dst (MinReductionV src1 src2));
19254 match(Set dst (MaxReductionV src1 src2));
19255 effect(TEMP vtmp1, TEMP vtmp2);
19256 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19257 ins_encode %{
19258 int opcode = this->ideal_Opcode();
19259 int vlen = Matcher::vector_length(this, $src2);
19260 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19261 %}
19262 ins_pipe( pipe_slow );
19263 %}
19264
19265 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19266 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19267 match(Set dst (AddReductionVL src1 src2));
19268 match(Set dst (MulReductionVL src1 src2));
19269 match(Set dst (AndReductionV src1 src2));
19270 match(Set dst ( OrReductionV src1 src2));
19271 match(Set dst (XorReductionV src1 src2));
19272 match(Set dst (MinReductionV src1 src2));
19273 match(Set dst (MaxReductionV src1 src2));
19274 effect(TEMP vtmp1, TEMP vtmp2);
19275 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19276 ins_encode %{
19277 int opcode = this->ideal_Opcode();
19278 int vlen = Matcher::vector_length(this, $src2);
19279 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19280 %}
19281 ins_pipe( pipe_slow );
19282 %}
19283
19284 // =======================Float Reduction==========================================
19285
19286 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19287 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19288 match(Set dst (AddReductionVF dst src));
19289 match(Set dst (MulReductionVF dst src));
19290 effect(TEMP dst, TEMP vtmp);
19291 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19292 ins_encode %{
19293 int opcode = this->ideal_Opcode();
19294 int vlen = Matcher::vector_length(this, $src);
19295 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19296 %}
19297 ins_pipe( pipe_slow );
19298 %}
19299
19300 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19301 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19302 match(Set dst (AddReductionVF dst src));
19303 match(Set dst (MulReductionVF dst src));
19304 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19305 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19306 ins_encode %{
19307 int opcode = this->ideal_Opcode();
19308 int vlen = Matcher::vector_length(this, $src);
19309 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19310 %}
19311 ins_pipe( pipe_slow );
19312 %}
19313
19314 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19315 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19316 match(Set dst (AddReductionVF dst src));
19317 match(Set dst (MulReductionVF dst src));
19318 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19319 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19320 ins_encode %{
19321 int opcode = this->ideal_Opcode();
19322 int vlen = Matcher::vector_length(this, $src);
19323 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19324 %}
19325 ins_pipe( pipe_slow );
19326 %}
19327
19328
19329 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19330 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19331 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19332 // src1 contains reduction identity
19333 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19334 match(Set dst (AddReductionVF src1 src2));
19335 match(Set dst (MulReductionVF src1 src2));
19336 effect(TEMP dst);
19337 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19338 ins_encode %{
19339 int opcode = this->ideal_Opcode();
19340 int vlen = Matcher::vector_length(this, $src2);
19341 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19342 %}
19343 ins_pipe( pipe_slow );
19344 %}
19345
19346 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19347 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19348 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19349 // src1 contains reduction identity
19350 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19351 match(Set dst (AddReductionVF src1 src2));
19352 match(Set dst (MulReductionVF src1 src2));
19353 effect(TEMP dst, TEMP vtmp);
19354 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19355 ins_encode %{
19356 int opcode = this->ideal_Opcode();
19357 int vlen = Matcher::vector_length(this, $src2);
19358 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19359 %}
19360 ins_pipe( pipe_slow );
19361 %}
19362
19363 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19364 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19365 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19366 // src1 contains reduction identity
19367 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19368 match(Set dst (AddReductionVF src1 src2));
19369 match(Set dst (MulReductionVF src1 src2));
19370 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19371 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19372 ins_encode %{
19373 int opcode = this->ideal_Opcode();
19374 int vlen = Matcher::vector_length(this, $src2);
19375 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19376 %}
19377 ins_pipe( pipe_slow );
19378 %}
19379
19380 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19381 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19382 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19383 // src1 contains reduction identity
19384 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19385 match(Set dst (AddReductionVF src1 src2));
19386 match(Set dst (MulReductionVF src1 src2));
19387 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19388 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19389 ins_encode %{
19390 int opcode = this->ideal_Opcode();
19391 int vlen = Matcher::vector_length(this, $src2);
19392 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19393 %}
19394 ins_pipe( pipe_slow );
19395 %}
19396
19397 // =======================Double Reduction==========================================
19398
19399 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19400 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19401 match(Set dst (AddReductionVD dst src));
19402 match(Set dst (MulReductionVD dst src));
19403 effect(TEMP dst, TEMP vtmp);
19404 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19405 ins_encode %{
19406 int opcode = this->ideal_Opcode();
19407 int vlen = Matcher::vector_length(this, $src);
19408 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19409 %}
19410 ins_pipe( pipe_slow );
19411 %}
19412
19413 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19414 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19415 match(Set dst (AddReductionVD dst src));
19416 match(Set dst (MulReductionVD dst src));
19417 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19418 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19419 ins_encode %{
19420 int opcode = this->ideal_Opcode();
19421 int vlen = Matcher::vector_length(this, $src);
19422 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19423 %}
19424 ins_pipe( pipe_slow );
19425 %}
19426
19427 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19428 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19429 match(Set dst (AddReductionVD dst src));
19430 match(Set dst (MulReductionVD dst src));
19431 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19432 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19433 ins_encode %{
19434 int opcode = this->ideal_Opcode();
19435 int vlen = Matcher::vector_length(this, $src);
19436 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19437 %}
19438 ins_pipe( pipe_slow );
19439 %}
19440
19441 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19442 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19443 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19444 // src1 contains reduction identity
19445 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19446 match(Set dst (AddReductionVD src1 src2));
19447 match(Set dst (MulReductionVD src1 src2));
19448 effect(TEMP dst);
19449 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19450 ins_encode %{
19451 int opcode = this->ideal_Opcode();
19452 int vlen = Matcher::vector_length(this, $src2);
19453 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19454 %}
19455 ins_pipe( pipe_slow );
19456 %}
19457
19458 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19459 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19460 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19461 // src1 contains reduction identity
19462 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19463 match(Set dst (AddReductionVD src1 src2));
19464 match(Set dst (MulReductionVD src1 src2));
19465 effect(TEMP dst, TEMP vtmp);
19466 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19467 ins_encode %{
19468 int opcode = this->ideal_Opcode();
19469 int vlen = Matcher::vector_length(this, $src2);
19470 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19471 %}
19472 ins_pipe( pipe_slow );
19473 %}
19474
19475 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19476 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19477 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19478 // src1 contains reduction identity
19479 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19480 match(Set dst (AddReductionVD src1 src2));
19481 match(Set dst (MulReductionVD src1 src2));
19482 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19483 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19484 ins_encode %{
19485 int opcode = this->ideal_Opcode();
19486 int vlen = Matcher::vector_length(this, $src2);
19487 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19488 %}
19489 ins_pipe( pipe_slow );
19490 %}
19491
19492 // =======================Byte Reduction==========================================
19493
19494 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19495 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19496 match(Set dst (AddReductionVI src1 src2));
19497 match(Set dst (AndReductionV src1 src2));
19498 match(Set dst ( OrReductionV src1 src2));
19499 match(Set dst (XorReductionV src1 src2));
19500 match(Set dst (MinReductionV src1 src2));
19501 match(Set dst (MaxReductionV src1 src2));
19502 effect(TEMP vtmp1, TEMP vtmp2);
19503 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19504 ins_encode %{
19505 int opcode = this->ideal_Opcode();
19506 int vlen = Matcher::vector_length(this, $src2);
19507 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19508 %}
19509 ins_pipe( pipe_slow );
19510 %}
19511
19512 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19513 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19514 match(Set dst (AddReductionVI src1 src2));
19515 match(Set dst (AndReductionV src1 src2));
19516 match(Set dst ( OrReductionV src1 src2));
19517 match(Set dst (XorReductionV src1 src2));
19518 match(Set dst (MinReductionV src1 src2));
19519 match(Set dst (MaxReductionV src1 src2));
19520 effect(TEMP vtmp1, TEMP vtmp2);
19521 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19522 ins_encode %{
19523 int opcode = this->ideal_Opcode();
19524 int vlen = Matcher::vector_length(this, $src2);
19525 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19526 %}
19527 ins_pipe( pipe_slow );
19528 %}
19529
19530 // =======================Short Reduction==========================================
19531
19532 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19533 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19534 match(Set dst (AddReductionVI src1 src2));
19535 match(Set dst (MulReductionVI src1 src2));
19536 match(Set dst (AndReductionV src1 src2));
19537 match(Set dst ( OrReductionV src1 src2));
19538 match(Set dst (XorReductionV src1 src2));
19539 match(Set dst (MinReductionV src1 src2));
19540 match(Set dst (MaxReductionV src1 src2));
19541 effect(TEMP vtmp1, TEMP vtmp2);
19542 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19543 ins_encode %{
19544 int opcode = this->ideal_Opcode();
19545 int vlen = Matcher::vector_length(this, $src2);
19546 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19547 %}
19548 ins_pipe( pipe_slow );
19549 %}
19550
19551 // =======================Mul Reduction==========================================
19552
19553 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19554 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19555 Matcher::vector_length(n->in(2)) <= 32); // src2
19556 match(Set dst (MulReductionVI src1 src2));
19557 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19558 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19559 ins_encode %{
19560 int opcode = this->ideal_Opcode();
19561 int vlen = Matcher::vector_length(this, $src2);
19562 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19563 %}
19564 ins_pipe( pipe_slow );
19565 %}
19566
19567 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19568 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19569 Matcher::vector_length(n->in(2)) == 64); // src2
19570 match(Set dst (MulReductionVI src1 src2));
19571 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19572 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19573 ins_encode %{
19574 int opcode = this->ideal_Opcode();
19575 int vlen = Matcher::vector_length(this, $src2);
19576 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19577 %}
19578 ins_pipe( pipe_slow );
19579 %}
19580
19581 //--------------------Min/Max Float Reduction --------------------
19582 // Float Min Reduction
19583 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19584 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19585 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19586 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19587 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19588 Matcher::vector_length(n->in(2)) == 2);
19589 match(Set dst (MinReductionV src1 src2));
19590 match(Set dst (MaxReductionV src1 src2));
19591 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19592 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19593 ins_encode %{
19594 assert(UseAVX > 0, "sanity");
19595
19596 int opcode = this->ideal_Opcode();
19597 int vlen = Matcher::vector_length(this, $src2);
19598 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19599 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19600 %}
19601 ins_pipe( pipe_slow );
19602 %}
19603
19604 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19605 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19606 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19607 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19608 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19609 Matcher::vector_length(n->in(2)) >= 4);
19610 match(Set dst (MinReductionV src1 src2));
19611 match(Set dst (MaxReductionV src1 src2));
19612 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19613 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19614 ins_encode %{
19615 assert(UseAVX > 0, "sanity");
19616
19617 int opcode = this->ideal_Opcode();
19618 int vlen = Matcher::vector_length(this, $src2);
19619 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19620 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19621 %}
19622 ins_pipe( pipe_slow );
19623 %}
19624
19625 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19626 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19627 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19628 Matcher::vector_length(n->in(2)) == 2);
19629 match(Set dst (MinReductionV dst src));
19630 match(Set dst (MaxReductionV dst src));
19631 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19632 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19633 ins_encode %{
19634 assert(UseAVX > 0, "sanity");
19635
19636 int opcode = this->ideal_Opcode();
19637 int vlen = Matcher::vector_length(this, $src);
19638 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19639 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19640 %}
19641 ins_pipe( pipe_slow );
19642 %}
19643
19644
19645 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19646 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19647 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19648 Matcher::vector_length(n->in(2)) >= 4);
19649 match(Set dst (MinReductionV dst src));
19650 match(Set dst (MaxReductionV dst src));
19651 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19652 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19653 ins_encode %{
19654 assert(UseAVX > 0, "sanity");
19655
19656 int opcode = this->ideal_Opcode();
19657 int vlen = Matcher::vector_length(this, $src);
19658 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19659 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19660 %}
19661 ins_pipe( pipe_slow );
19662 %}
19663
19664 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19665 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19666 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19667 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19668 Matcher::vector_length(n->in(2)) == 2);
19669 match(Set dst (MinReductionV src1 src2));
19670 match(Set dst (MaxReductionV src1 src2));
19671 effect(TEMP dst, TEMP xtmp1);
19672 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19673 ins_encode %{
19674 int opcode = this->ideal_Opcode();
19675 int vlen = Matcher::vector_length(this, $src2);
19676 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19677 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19678 %}
19679 ins_pipe( pipe_slow );
19680 %}
19681
19682 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19683 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19684 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19685 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19686 Matcher::vector_length(n->in(2)) >= 4);
19687 match(Set dst (MinReductionV src1 src2));
19688 match(Set dst (MaxReductionV src1 src2));
19689 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19690 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19691 ins_encode %{
19692 int opcode = this->ideal_Opcode();
19693 int vlen = Matcher::vector_length(this, $src2);
19694 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19695 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19696 %}
19697 ins_pipe( pipe_slow );
19698 %}
19699
19700 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19701 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19702 Matcher::vector_length(n->in(2)) == 2);
19703 match(Set dst (MinReductionV dst src));
19704 match(Set dst (MaxReductionV dst src));
19705 effect(TEMP dst, TEMP xtmp1);
19706 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19707 ins_encode %{
19708 int opcode = this->ideal_Opcode();
19709 int vlen = Matcher::vector_length(this, $src);
19710 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19711 $xtmp1$$XMMRegister);
19712 %}
19713 ins_pipe( pipe_slow );
19714 %}
19715
19716 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19717 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19718 Matcher::vector_length(n->in(2)) >= 4);
19719 match(Set dst (MinReductionV dst src));
19720 match(Set dst (MaxReductionV dst src));
19721 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19722 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19723 ins_encode %{
19724 int opcode = this->ideal_Opcode();
19725 int vlen = Matcher::vector_length(this, $src);
19726 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19727 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19728 %}
19729 ins_pipe( pipe_slow );
19730 %}
19731
19732 //--------------------Min Double Reduction --------------------
19733 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19734 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19735 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19736 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19737 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19738 Matcher::vector_length(n->in(2)) == 2);
19739 match(Set dst (MinReductionV src1 src2));
19740 match(Set dst (MaxReductionV src1 src2));
19741 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19742 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19743 ins_encode %{
19744 assert(UseAVX > 0, "sanity");
19745
19746 int opcode = this->ideal_Opcode();
19747 int vlen = Matcher::vector_length(this, $src2);
19748 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19749 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19750 %}
19751 ins_pipe( pipe_slow );
19752 %}
19753
19754 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19755 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19756 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19757 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19758 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19759 Matcher::vector_length(n->in(2)) >= 4);
19760 match(Set dst (MinReductionV src1 src2));
19761 match(Set dst (MaxReductionV src1 src2));
19762 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19763 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19764 ins_encode %{
19765 assert(UseAVX > 0, "sanity");
19766
19767 int opcode = this->ideal_Opcode();
19768 int vlen = Matcher::vector_length(this, $src2);
19769 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19770 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19771 %}
19772 ins_pipe( pipe_slow );
19773 %}
19774
19775
19776 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19777 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19778 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19779 Matcher::vector_length(n->in(2)) == 2);
19780 match(Set dst (MinReductionV dst src));
19781 match(Set dst (MaxReductionV dst src));
19782 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19783 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19784 ins_encode %{
19785 assert(UseAVX > 0, "sanity");
19786
19787 int opcode = this->ideal_Opcode();
19788 int vlen = Matcher::vector_length(this, $src);
19789 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19790 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19791 %}
19792 ins_pipe( pipe_slow );
19793 %}
19794
19795 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19796 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19797 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19798 Matcher::vector_length(n->in(2)) >= 4);
19799 match(Set dst (MinReductionV dst src));
19800 match(Set dst (MaxReductionV dst src));
19801 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19802 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19803 ins_encode %{
19804 assert(UseAVX > 0, "sanity");
19805
19806 int opcode = this->ideal_Opcode();
19807 int vlen = Matcher::vector_length(this, $src);
19808 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19809 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19810 %}
19811 ins_pipe( pipe_slow );
19812 %}
19813
19814 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19815 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19816 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19817 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19818 Matcher::vector_length(n->in(2)) == 2);
19819 match(Set dst (MinReductionV src1 src2));
19820 match(Set dst (MaxReductionV src1 src2));
19821 effect(TEMP dst, TEMP xtmp1);
19822 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19823 ins_encode %{
19824 int opcode = this->ideal_Opcode();
19825 int vlen = Matcher::vector_length(this, $src2);
19826 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19827 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19828 %}
19829 ins_pipe( pipe_slow );
19830 %}
19831
19832 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19833 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19834 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19835 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19836 Matcher::vector_length(n->in(2)) >= 4);
19837 match(Set dst (MinReductionV src1 src2));
19838 match(Set dst (MaxReductionV src1 src2));
19839 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19840 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19841 ins_encode %{
19842 int opcode = this->ideal_Opcode();
19843 int vlen = Matcher::vector_length(this, $src2);
19844 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19845 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19846 %}
19847 ins_pipe( pipe_slow );
19848 %}
19849
19850
19851 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19852 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19853 Matcher::vector_length(n->in(2)) == 2);
19854 match(Set dst (MinReductionV dst src));
19855 match(Set dst (MaxReductionV dst src));
19856 effect(TEMP dst, TEMP xtmp1);
19857 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19858 ins_encode %{
19859 int opcode = this->ideal_Opcode();
19860 int vlen = Matcher::vector_length(this, $src);
19861 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19862 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19863 %}
19864 ins_pipe( pipe_slow );
19865 %}
19866
19867 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19868 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19869 Matcher::vector_length(n->in(2)) >= 4);
19870 match(Set dst (MinReductionV dst src));
19871 match(Set dst (MaxReductionV dst src));
19872 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19873 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19874 ins_encode %{
19875 int opcode = this->ideal_Opcode();
19876 int vlen = Matcher::vector_length(this, $src);
19877 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19878 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19879 %}
19880 ins_pipe( pipe_slow );
19881 %}
19882
19883 // ====================VECTOR ARITHMETIC=======================================
19884
19885 // --------------------------------- ADD --------------------------------------
19886
19887 // Bytes vector add
19888 instruct vaddB(vec dst, vec src) %{
19889 predicate(UseAVX == 0);
19890 match(Set dst (AddVB dst src));
19891 format %{ "paddb $dst,$src\t! add packedB" %}
19892 ins_encode %{
19893 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19894 %}
19895 ins_pipe( pipe_slow );
19896 %}
19897
19898 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19899 predicate(UseAVX > 0);
19900 match(Set dst (AddVB src1 src2));
19901 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
19902 ins_encode %{
19903 int vlen_enc = vector_length_encoding(this);
19904 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19905 %}
19906 ins_pipe( pipe_slow );
19907 %}
19908
19909 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19910 predicate((UseAVX > 0) &&
19911 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19912 match(Set dst (AddVB src (LoadVector mem)));
19913 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
19914 ins_encode %{
19915 int vlen_enc = vector_length_encoding(this);
19916 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19917 %}
19918 ins_pipe( pipe_slow );
19919 %}
19920
19921 // Shorts/Chars vector add
19922 instruct vaddS(vec dst, vec src) %{
19923 predicate(UseAVX == 0);
19924 match(Set dst (AddVS dst src));
19925 format %{ "paddw $dst,$src\t! add packedS" %}
19926 ins_encode %{
19927 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19928 %}
19929 ins_pipe( pipe_slow );
19930 %}
19931
19932 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19933 predicate(UseAVX > 0);
19934 match(Set dst (AddVS src1 src2));
19935 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
19936 ins_encode %{
19937 int vlen_enc = vector_length_encoding(this);
19938 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19939 %}
19940 ins_pipe( pipe_slow );
19941 %}
19942
19943 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19944 predicate((UseAVX > 0) &&
19945 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19946 match(Set dst (AddVS src (LoadVector mem)));
19947 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
19948 ins_encode %{
19949 int vlen_enc = vector_length_encoding(this);
19950 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19951 %}
19952 ins_pipe( pipe_slow );
19953 %}
19954
19955 // Integers vector add
19956 instruct vaddI(vec dst, vec src) %{
19957 predicate(UseAVX == 0);
19958 match(Set dst (AddVI dst src));
19959 format %{ "paddd $dst,$src\t! add packedI" %}
19960 ins_encode %{
19961 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19962 %}
19963 ins_pipe( pipe_slow );
19964 %}
19965
19966 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19967 predicate(UseAVX > 0);
19968 match(Set dst (AddVI src1 src2));
19969 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
19970 ins_encode %{
19971 int vlen_enc = vector_length_encoding(this);
19972 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19973 %}
19974 ins_pipe( pipe_slow );
19975 %}
19976
19977
19978 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19979 predicate((UseAVX > 0) &&
19980 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19981 match(Set dst (AddVI src (LoadVector mem)));
19982 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
19983 ins_encode %{
19984 int vlen_enc = vector_length_encoding(this);
19985 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19986 %}
19987 ins_pipe( pipe_slow );
19988 %}
19989
19990 // Longs vector add
19991 instruct vaddL(vec dst, vec src) %{
19992 predicate(UseAVX == 0);
19993 match(Set dst (AddVL dst src));
19994 format %{ "paddq $dst,$src\t! add packedL" %}
19995 ins_encode %{
19996 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19997 %}
19998 ins_pipe( pipe_slow );
19999 %}
20000
20001 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20002 predicate(UseAVX > 0);
20003 match(Set dst (AddVL src1 src2));
20004 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20005 ins_encode %{
20006 int vlen_enc = vector_length_encoding(this);
20007 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20008 %}
20009 ins_pipe( pipe_slow );
20010 %}
20011
20012 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20013 predicate((UseAVX > 0) &&
20014 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20015 match(Set dst (AddVL src (LoadVector mem)));
20016 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20017 ins_encode %{
20018 int vlen_enc = vector_length_encoding(this);
20019 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20020 %}
20021 ins_pipe( pipe_slow );
20022 %}
20023
20024 // Floats vector add
20025 instruct vaddF(vec dst, vec src) %{
20026 predicate(UseAVX == 0);
20027 match(Set dst (AddVF dst src));
20028 format %{ "addps $dst,$src\t! add packedF" %}
20029 ins_encode %{
20030 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20036 predicate(UseAVX > 0);
20037 match(Set dst (AddVF src1 src2));
20038 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20039 ins_encode %{
20040 int vlen_enc = vector_length_encoding(this);
20041 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20042 %}
20043 ins_pipe( pipe_slow );
20044 %}
20045
20046 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20047 predicate((UseAVX > 0) &&
20048 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20049 match(Set dst (AddVF src (LoadVector mem)));
20050 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20051 ins_encode %{
20052 int vlen_enc = vector_length_encoding(this);
20053 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20054 %}
20055 ins_pipe( pipe_slow );
20056 %}
20057
20058 // Doubles vector add
20059 instruct vaddD(vec dst, vec src) %{
20060 predicate(UseAVX == 0);
20061 match(Set dst (AddVD dst src));
20062 format %{ "addpd $dst,$src\t! add packedD" %}
20063 ins_encode %{
20064 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20065 %}
20066 ins_pipe( pipe_slow );
20067 %}
20068
20069 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20070 predicate(UseAVX > 0);
20071 match(Set dst (AddVD src1 src2));
20072 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20073 ins_encode %{
20074 int vlen_enc = vector_length_encoding(this);
20075 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20076 %}
20077 ins_pipe( pipe_slow );
20078 %}
20079
20080 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20081 predicate((UseAVX > 0) &&
20082 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20083 match(Set dst (AddVD src (LoadVector mem)));
20084 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20085 ins_encode %{
20086 int vlen_enc = vector_length_encoding(this);
20087 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20088 %}
20089 ins_pipe( pipe_slow );
20090 %}
20091
20092 // --------------------------------- SUB --------------------------------------
20093
20094 // Bytes vector sub
20095 instruct vsubB(vec dst, vec src) %{
20096 predicate(UseAVX == 0);
20097 match(Set dst (SubVB dst src));
20098 format %{ "psubb $dst,$src\t! sub packedB" %}
20099 ins_encode %{
20100 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20101 %}
20102 ins_pipe( pipe_slow );
20103 %}
20104
20105 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20106 predicate(UseAVX > 0);
20107 match(Set dst (SubVB src1 src2));
20108 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20109 ins_encode %{
20110 int vlen_enc = vector_length_encoding(this);
20111 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20112 %}
20113 ins_pipe( pipe_slow );
20114 %}
20115
20116 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20117 predicate((UseAVX > 0) &&
20118 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20119 match(Set dst (SubVB src (LoadVector mem)));
20120 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20121 ins_encode %{
20122 int vlen_enc = vector_length_encoding(this);
20123 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20124 %}
20125 ins_pipe( pipe_slow );
20126 %}
20127
20128 // Shorts/Chars vector sub
20129 instruct vsubS(vec dst, vec src) %{
20130 predicate(UseAVX == 0);
20131 match(Set dst (SubVS dst src));
20132 format %{ "psubw $dst,$src\t! sub packedS" %}
20133 ins_encode %{
20134 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20135 %}
20136 ins_pipe( pipe_slow );
20137 %}
20138
20139
20140 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20141 predicate(UseAVX > 0);
20142 match(Set dst (SubVS src1 src2));
20143 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20144 ins_encode %{
20145 int vlen_enc = vector_length_encoding(this);
20146 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20147 %}
20148 ins_pipe( pipe_slow );
20149 %}
20150
20151 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20152 predicate((UseAVX > 0) &&
20153 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20154 match(Set dst (SubVS src (LoadVector mem)));
20155 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20156 ins_encode %{
20157 int vlen_enc = vector_length_encoding(this);
20158 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20159 %}
20160 ins_pipe( pipe_slow );
20161 %}
20162
20163 // Integers vector sub
20164 instruct vsubI(vec dst, vec src) %{
20165 predicate(UseAVX == 0);
20166 match(Set dst (SubVI dst src));
20167 format %{ "psubd $dst,$src\t! sub packedI" %}
20168 ins_encode %{
20169 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20170 %}
20171 ins_pipe( pipe_slow );
20172 %}
20173
20174 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20175 predicate(UseAVX > 0);
20176 match(Set dst (SubVI src1 src2));
20177 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20178 ins_encode %{
20179 int vlen_enc = vector_length_encoding(this);
20180 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20181 %}
20182 ins_pipe( pipe_slow );
20183 %}
20184
20185 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20186 predicate((UseAVX > 0) &&
20187 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20188 match(Set dst (SubVI src (LoadVector mem)));
20189 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20190 ins_encode %{
20191 int vlen_enc = vector_length_encoding(this);
20192 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20193 %}
20194 ins_pipe( pipe_slow );
20195 %}
20196
20197 // Longs vector sub
20198 instruct vsubL(vec dst, vec src) %{
20199 predicate(UseAVX == 0);
20200 match(Set dst (SubVL dst src));
20201 format %{ "psubq $dst,$src\t! sub packedL" %}
20202 ins_encode %{
20203 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20204 %}
20205 ins_pipe( pipe_slow );
20206 %}
20207
20208 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20209 predicate(UseAVX > 0);
20210 match(Set dst (SubVL src1 src2));
20211 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20212 ins_encode %{
20213 int vlen_enc = vector_length_encoding(this);
20214 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20215 %}
20216 ins_pipe( pipe_slow );
20217 %}
20218
20219
20220 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20221 predicate((UseAVX > 0) &&
20222 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20223 match(Set dst (SubVL src (LoadVector mem)));
20224 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20225 ins_encode %{
20226 int vlen_enc = vector_length_encoding(this);
20227 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20228 %}
20229 ins_pipe( pipe_slow );
20230 %}
20231
20232 // Floats vector sub
20233 instruct vsubF(vec dst, vec src) %{
20234 predicate(UseAVX == 0);
20235 match(Set dst (SubVF dst src));
20236 format %{ "subps $dst,$src\t! sub packedF" %}
20237 ins_encode %{
20238 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20239 %}
20240 ins_pipe( pipe_slow );
20241 %}
20242
20243 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20244 predicate(UseAVX > 0);
20245 match(Set dst (SubVF src1 src2));
20246 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20247 ins_encode %{
20248 int vlen_enc = vector_length_encoding(this);
20249 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20250 %}
20251 ins_pipe( pipe_slow );
20252 %}
20253
20254 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20255 predicate((UseAVX > 0) &&
20256 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20257 match(Set dst (SubVF src (LoadVector mem)));
20258 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20259 ins_encode %{
20260 int vlen_enc = vector_length_encoding(this);
20261 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20262 %}
20263 ins_pipe( pipe_slow );
20264 %}
20265
20266 // Doubles vector sub
20267 instruct vsubD(vec dst, vec src) %{
20268 predicate(UseAVX == 0);
20269 match(Set dst (SubVD dst src));
20270 format %{ "subpd $dst,$src\t! sub packedD" %}
20271 ins_encode %{
20272 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20273 %}
20274 ins_pipe( pipe_slow );
20275 %}
20276
20277 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20278 predicate(UseAVX > 0);
20279 match(Set dst (SubVD src1 src2));
20280 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20281 ins_encode %{
20282 int vlen_enc = vector_length_encoding(this);
20283 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20284 %}
20285 ins_pipe( pipe_slow );
20286 %}
20287
20288 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20289 predicate((UseAVX > 0) &&
20290 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20291 match(Set dst (SubVD src (LoadVector mem)));
20292 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20293 ins_encode %{
20294 int vlen_enc = vector_length_encoding(this);
20295 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20296 %}
20297 ins_pipe( pipe_slow );
20298 %}
20299
20300 // --------------------------------- MUL --------------------------------------
20301
20302 // Byte vector mul
20303 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20304 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20305 match(Set dst (MulVB src1 src2));
20306 effect(TEMP dst, TEMP xtmp);
20307 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20308 ins_encode %{
20309 assert(UseSSE > 3, "required");
20310 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20311 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20312 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20313 __ psllw($dst$$XMMRegister, 8);
20314 __ psrlw($dst$$XMMRegister, 8);
20315 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20316 %}
20317 ins_pipe( pipe_slow );
20318 %}
20319
20320 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20321 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20322 match(Set dst (MulVB src1 src2));
20323 effect(TEMP dst, TEMP xtmp);
20324 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20325 ins_encode %{
20326 assert(UseSSE > 3, "required");
20327 // Odd-index elements
20328 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20329 __ psrlw($dst$$XMMRegister, 8);
20330 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20331 __ psrlw($xtmp$$XMMRegister, 8);
20332 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20333 __ psllw($dst$$XMMRegister, 8);
20334 // Even-index elements
20335 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20336 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20337 __ psllw($xtmp$$XMMRegister, 8);
20338 __ psrlw($xtmp$$XMMRegister, 8);
20339 // Combine
20340 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20341 %}
20342 ins_pipe( pipe_slow );
20343 %}
20344
20345 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20346 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20347 match(Set dst (MulVB src1 src2));
20348 effect(TEMP xtmp1, TEMP xtmp2);
20349 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20350 ins_encode %{
20351 int vlen_enc = vector_length_encoding(this);
20352 // Odd-index elements
20353 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20354 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20355 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20356 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20357 // Even-index elements
20358 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20359 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20360 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20361 // Combine
20362 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20363 %}
20364 ins_pipe( pipe_slow );
20365 %}
20366
20367 // Shorts/Chars vector mul
20368 instruct vmulS(vec dst, vec src) %{
20369 predicate(UseAVX == 0);
20370 match(Set dst (MulVS dst src));
20371 format %{ "pmullw $dst,$src\t! mul packedS" %}
20372 ins_encode %{
20373 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20374 %}
20375 ins_pipe( pipe_slow );
20376 %}
20377
20378 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20379 predicate(UseAVX > 0);
20380 match(Set dst (MulVS src1 src2));
20381 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20382 ins_encode %{
20383 int vlen_enc = vector_length_encoding(this);
20384 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20385 %}
20386 ins_pipe( pipe_slow );
20387 %}
20388
20389 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20390 predicate((UseAVX > 0) &&
20391 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20392 match(Set dst (MulVS src (LoadVector mem)));
20393 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20394 ins_encode %{
20395 int vlen_enc = vector_length_encoding(this);
20396 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20397 %}
20398 ins_pipe( pipe_slow );
20399 %}
20400
20401 // Integers vector mul
20402 instruct vmulI(vec dst, vec src) %{
20403 predicate(UseAVX == 0);
20404 match(Set dst (MulVI dst src));
20405 format %{ "pmulld $dst,$src\t! mul packedI" %}
20406 ins_encode %{
20407 assert(UseSSE > 3, "required");
20408 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20409 %}
20410 ins_pipe( pipe_slow );
20411 %}
20412
20413 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20414 predicate(UseAVX > 0);
20415 match(Set dst (MulVI src1 src2));
20416 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20417 ins_encode %{
20418 int vlen_enc = vector_length_encoding(this);
20419 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20420 %}
20421 ins_pipe( pipe_slow );
20422 %}
20423
20424 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20425 predicate((UseAVX > 0) &&
20426 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20427 match(Set dst (MulVI src (LoadVector mem)));
20428 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20429 ins_encode %{
20430 int vlen_enc = vector_length_encoding(this);
20431 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20432 %}
20433 ins_pipe( pipe_slow );
20434 %}
20435
20436 // Longs vector mul
20437 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20438 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20439 VM_Version::supports_avx512dq()) ||
20440 VM_Version::supports_avx512vldq());
20441 match(Set dst (MulVL src1 src2));
20442 ins_cost(500);
20443 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20444 ins_encode %{
20445 assert(UseAVX > 2, "required");
20446 int vlen_enc = vector_length_encoding(this);
20447 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20448 %}
20449 ins_pipe( pipe_slow );
20450 %}
20451
20452 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20453 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20454 VM_Version::supports_avx512dq()) ||
20455 (Matcher::vector_length_in_bytes(n) > 8 &&
20456 VM_Version::supports_avx512vldq()));
20457 match(Set dst (MulVL src (LoadVector mem)));
20458 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20459 ins_cost(500);
20460 ins_encode %{
20461 assert(UseAVX > 2, "required");
20462 int vlen_enc = vector_length_encoding(this);
20463 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20464 %}
20465 ins_pipe( pipe_slow );
20466 %}
20467
20468 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20469 predicate(UseAVX == 0);
20470 match(Set dst (MulVL src1 src2));
20471 ins_cost(500);
20472 effect(TEMP dst, TEMP xtmp);
20473 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20474 ins_encode %{
20475 assert(VM_Version::supports_sse4_1(), "required");
20476 // Get the lo-hi products, only the lower 32 bits is in concerns
20477 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20478 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20479 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20480 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20481 __ psllq($dst$$XMMRegister, 32);
20482 // Get the lo-lo products
20483 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20484 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20485 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20486 %}
20487 ins_pipe( pipe_slow );
20488 %}
20489
20490 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20491 predicate(UseAVX > 0 &&
20492 ((Matcher::vector_length_in_bytes(n) == 64 &&
20493 !VM_Version::supports_avx512dq()) ||
20494 (Matcher::vector_length_in_bytes(n) < 64 &&
20495 !VM_Version::supports_avx512vldq())));
20496 match(Set dst (MulVL src1 src2));
20497 effect(TEMP xtmp1, TEMP xtmp2);
20498 ins_cost(500);
20499 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20500 ins_encode %{
20501 int vlen_enc = vector_length_encoding(this);
20502 // Get the lo-hi products, only the lower 32 bits is in concerns
20503 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20504 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20505 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20506 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20507 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20508 // Get the lo-lo products
20509 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20510 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20511 %}
20512 ins_pipe( pipe_slow );
20513 %}
20514
20515 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20516 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20517 match(Set dst (MulVL src1 src2));
20518 ins_cost(100);
20519 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20520 ins_encode %{
20521 int vlen_enc = vector_length_encoding(this);
20522 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20523 %}
20524 ins_pipe( pipe_slow );
20525 %}
20526
20527 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20528 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20529 match(Set dst (MulVL src1 src2));
20530 ins_cost(100);
20531 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20532 ins_encode %{
20533 int vlen_enc = vector_length_encoding(this);
20534 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535 %}
20536 ins_pipe( pipe_slow );
20537 %}
20538
20539 // Floats vector mul
20540 instruct vmulF(vec dst, vec src) %{
20541 predicate(UseAVX == 0);
20542 match(Set dst (MulVF dst src));
20543 format %{ "mulps $dst,$src\t! mul packedF" %}
20544 ins_encode %{
20545 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20546 %}
20547 ins_pipe( pipe_slow );
20548 %}
20549
20550 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20551 predicate(UseAVX > 0);
20552 match(Set dst (MulVF src1 src2));
20553 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20554 ins_encode %{
20555 int vlen_enc = vector_length_encoding(this);
20556 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20557 %}
20558 ins_pipe( pipe_slow );
20559 %}
20560
20561 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20562 predicate((UseAVX > 0) &&
20563 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20564 match(Set dst (MulVF src (LoadVector mem)));
20565 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20566 ins_encode %{
20567 int vlen_enc = vector_length_encoding(this);
20568 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20569 %}
20570 ins_pipe( pipe_slow );
20571 %}
20572
20573 // Doubles vector mul
20574 instruct vmulD(vec dst, vec src) %{
20575 predicate(UseAVX == 0);
20576 match(Set dst (MulVD dst src));
20577 format %{ "mulpd $dst,$src\t! mul packedD" %}
20578 ins_encode %{
20579 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20580 %}
20581 ins_pipe( pipe_slow );
20582 %}
20583
20584 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20585 predicate(UseAVX > 0);
20586 match(Set dst (MulVD src1 src2));
20587 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20588 ins_encode %{
20589 int vlen_enc = vector_length_encoding(this);
20590 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20591 %}
20592 ins_pipe( pipe_slow );
20593 %}
20594
20595 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20596 predicate((UseAVX > 0) &&
20597 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20598 match(Set dst (MulVD src (LoadVector mem)));
20599 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20600 ins_encode %{
20601 int vlen_enc = vector_length_encoding(this);
20602 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20603 %}
20604 ins_pipe( pipe_slow );
20605 %}
20606
20607 // --------------------------------- DIV --------------------------------------
20608
20609 // Floats vector div
20610 instruct vdivF(vec dst, vec src) %{
20611 predicate(UseAVX == 0);
20612 match(Set dst (DivVF dst src));
20613 format %{ "divps $dst,$src\t! div packedF" %}
20614 ins_encode %{
20615 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20616 %}
20617 ins_pipe( pipe_slow );
20618 %}
20619
20620 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20621 predicate(UseAVX > 0);
20622 match(Set dst (DivVF src1 src2));
20623 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20624 ins_encode %{
20625 int vlen_enc = vector_length_encoding(this);
20626 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20627 %}
20628 ins_pipe( pipe_slow );
20629 %}
20630
20631 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20632 predicate((UseAVX > 0) &&
20633 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20634 match(Set dst (DivVF src (LoadVector mem)));
20635 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20636 ins_encode %{
20637 int vlen_enc = vector_length_encoding(this);
20638 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20639 %}
20640 ins_pipe( pipe_slow );
20641 %}
20642
20643 // Doubles vector div
20644 instruct vdivD(vec dst, vec src) %{
20645 predicate(UseAVX == 0);
20646 match(Set dst (DivVD dst src));
20647 format %{ "divpd $dst,$src\t! div packedD" %}
20648 ins_encode %{
20649 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20650 %}
20651 ins_pipe( pipe_slow );
20652 %}
20653
20654 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20655 predicate(UseAVX > 0);
20656 match(Set dst (DivVD src1 src2));
20657 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20658 ins_encode %{
20659 int vlen_enc = vector_length_encoding(this);
20660 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20661 %}
20662 ins_pipe( pipe_slow );
20663 %}
20664
20665 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20666 predicate((UseAVX > 0) &&
20667 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20668 match(Set dst (DivVD src (LoadVector mem)));
20669 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20670 ins_encode %{
20671 int vlen_enc = vector_length_encoding(this);
20672 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20673 %}
20674 ins_pipe( pipe_slow );
20675 %}
20676
20677 // ------------------------------ MinMax ---------------------------------------
20678
20679 // Byte, Short, Int vector Min/Max
20680 instruct minmax_reg_sse(vec dst, vec src) %{
20681 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20682 UseAVX == 0);
20683 match(Set dst (MinV dst src));
20684 match(Set dst (MaxV dst src));
20685 format %{ "vector_minmax $dst,$src\t! " %}
20686 ins_encode %{
20687 assert(UseSSE >= 4, "required");
20688
20689 int opcode = this->ideal_Opcode();
20690 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20691 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20692 %}
20693 ins_pipe( pipe_slow );
20694 %}
20695
20696 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20697 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20698 UseAVX > 0);
20699 match(Set dst (MinV src1 src2));
20700 match(Set dst (MaxV src1 src2));
20701 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20702 ins_encode %{
20703 int opcode = this->ideal_Opcode();
20704 int vlen_enc = vector_length_encoding(this);
20705 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20706
20707 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708 %}
20709 ins_pipe( pipe_slow );
20710 %}
20711
20712 // Long vector Min/Max
20713 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20714 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20715 UseAVX == 0);
20716 match(Set dst (MinV dst src));
20717 match(Set dst (MaxV src dst));
20718 effect(TEMP dst, TEMP tmp);
20719 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20720 ins_encode %{
20721 assert(UseSSE >= 4, "required");
20722
20723 int opcode = this->ideal_Opcode();
20724 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20725 assert(elem_bt == T_LONG, "sanity");
20726
20727 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20728 %}
20729 ins_pipe( pipe_slow );
20730 %}
20731
20732 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20733 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20734 UseAVX > 0 && !VM_Version::supports_avx512vl());
20735 match(Set dst (MinV src1 src2));
20736 match(Set dst (MaxV src1 src2));
20737 effect(TEMP dst);
20738 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20739 ins_encode %{
20740 int vlen_enc = vector_length_encoding(this);
20741 int opcode = this->ideal_Opcode();
20742 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20743 assert(elem_bt == T_LONG, "sanity");
20744
20745 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20746 %}
20747 ins_pipe( pipe_slow );
20748 %}
20749
20750 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20751 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20752 Matcher::vector_element_basic_type(n) == T_LONG);
20753 match(Set dst (MinV src1 src2));
20754 match(Set dst (MaxV src1 src2));
20755 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20756 ins_encode %{
20757 assert(UseAVX > 2, "required");
20758
20759 int vlen_enc = vector_length_encoding(this);
20760 int opcode = this->ideal_Opcode();
20761 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20762 assert(elem_bt == T_LONG, "sanity");
20763
20764 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20765 %}
20766 ins_pipe( pipe_slow );
20767 %}
20768
20769 // Float/Double vector Min/Max
20770 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20771 predicate(VM_Version::supports_avx10_2() &&
20772 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20773 match(Set dst (MinV a b));
20774 match(Set dst (MaxV a b));
20775 format %{ "vector_minmaxFP $dst, $a, $b" %}
20776 ins_encode %{
20777 int vlen_enc = vector_length_encoding(this);
20778 int opcode = this->ideal_Opcode();
20779 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20780 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20781 %}
20782 ins_pipe( pipe_slow );
20783 %}
20784
20785 // Float/Double vector Min/Max
20786 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20787 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20788 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20789 UseAVX > 0);
20790 match(Set dst (MinV a b));
20791 match(Set dst (MaxV a b));
20792 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20793 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20794 ins_encode %{
20795 assert(UseAVX > 0, "required");
20796
20797 int opcode = this->ideal_Opcode();
20798 int vlen_enc = vector_length_encoding(this);
20799 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20800
20801 __ vminmax_fp(opcode, elem_bt,
20802 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20803 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20804 %}
20805 ins_pipe( pipe_slow );
20806 %}
20807
20808 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20809 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20810 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20811 match(Set dst (MinV a b));
20812 match(Set dst (MaxV a b));
20813 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20814 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20815 ins_encode %{
20816 assert(UseAVX > 2, "required");
20817
20818 int opcode = this->ideal_Opcode();
20819 int vlen_enc = vector_length_encoding(this);
20820 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20821
20822 __ evminmax_fp(opcode, elem_bt,
20823 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20824 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20825 %}
20826 ins_pipe( pipe_slow );
20827 %}
20828
20829 // ------------------------------ Unsigned vector Min/Max ----------------------
20830
20831 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20832 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20833 match(Set dst (UMinV a b));
20834 match(Set dst (UMaxV a b));
20835 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20836 ins_encode %{
20837 int opcode = this->ideal_Opcode();
20838 int vlen_enc = vector_length_encoding(this);
20839 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20840 assert(is_integral_type(elem_bt), "");
20841 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20842 %}
20843 ins_pipe( pipe_slow );
20844 %}
20845
20846 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20847 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20848 match(Set dst (UMinV a (LoadVector b)));
20849 match(Set dst (UMaxV a (LoadVector b)));
20850 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20851 ins_encode %{
20852 int opcode = this->ideal_Opcode();
20853 int vlen_enc = vector_length_encoding(this);
20854 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20855 assert(is_integral_type(elem_bt), "");
20856 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20857 %}
20858 ins_pipe( pipe_slow );
20859 %}
20860
20861 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20862 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20863 match(Set dst (UMinV a b));
20864 match(Set dst (UMaxV a b));
20865 effect(TEMP xtmp1, TEMP xtmp2);
20866 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20867 ins_encode %{
20868 int opcode = this->ideal_Opcode();
20869 int vlen_enc = vector_length_encoding(this);
20870 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20871 %}
20872 ins_pipe( pipe_slow );
20873 %}
20874
20875 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20876 match(Set dst (UMinV (Binary dst src2) mask));
20877 match(Set dst (UMaxV (Binary dst src2) mask));
20878 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20879 ins_encode %{
20880 int vlen_enc = vector_length_encoding(this);
20881 BasicType bt = Matcher::vector_element_basic_type(this);
20882 int opc = this->ideal_Opcode();
20883 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20884 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20885 %}
20886 ins_pipe( pipe_slow );
20887 %}
20888
20889 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20890 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20891 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20892 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20893 ins_encode %{
20894 int vlen_enc = vector_length_encoding(this);
20895 BasicType bt = Matcher::vector_element_basic_type(this);
20896 int opc = this->ideal_Opcode();
20897 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20898 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20899 %}
20900 ins_pipe( pipe_slow );
20901 %}
20902
20903 // --------------------------------- Signum/CopySign ---------------------------
20904
20905 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20906 match(Set dst (SignumF dst (Binary zero one)));
20907 effect(KILL cr);
20908 format %{ "signumF $dst, $dst" %}
20909 ins_encode %{
20910 int opcode = this->ideal_Opcode();
20911 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20912 %}
20913 ins_pipe( pipe_slow );
20914 %}
20915
20916 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20917 match(Set dst (SignumD dst (Binary zero one)));
20918 effect(KILL cr);
20919 format %{ "signumD $dst, $dst" %}
20920 ins_encode %{
20921 int opcode = this->ideal_Opcode();
20922 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20923 %}
20924 ins_pipe( pipe_slow );
20925 %}
20926
20927 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20928 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20929 match(Set dst (SignumVF src (Binary zero one)));
20930 match(Set dst (SignumVD src (Binary zero one)));
20931 effect(TEMP dst, TEMP xtmp1);
20932 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20933 ins_encode %{
20934 int opcode = this->ideal_Opcode();
20935 int vec_enc = vector_length_encoding(this);
20936 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20937 $xtmp1$$XMMRegister, vec_enc);
20938 %}
20939 ins_pipe( pipe_slow );
20940 %}
20941
20942 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20943 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20944 match(Set dst (SignumVF src (Binary zero one)));
20945 match(Set dst (SignumVD src (Binary zero one)));
20946 effect(TEMP dst, TEMP ktmp1);
20947 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20948 ins_encode %{
20949 int opcode = this->ideal_Opcode();
20950 int vec_enc = vector_length_encoding(this);
20951 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20952 $ktmp1$$KRegister, vec_enc);
20953 %}
20954 ins_pipe( pipe_slow );
20955 %}
20956
20957 // ---------------------------------------
20958 // For copySign use 0xE4 as writemask for vpternlog
20959 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20960 // C (xmm2) is set to 0x7FFFFFFF
20961 // Wherever xmm2 is 0, we want to pick from B (sign)
20962 // Wherever xmm2 is 1, we want to pick from A (src)
20963 //
20964 // A B C Result
20965 // 0 0 0 0
20966 // 0 0 1 0
20967 // 0 1 0 1
20968 // 0 1 1 0
20969 // 1 0 0 0
20970 // 1 0 1 1
20971 // 1 1 0 1
20972 // 1 1 1 1
20973 //
20974 // Result going from high bit to low bit is 0x11100100 = 0xe4
20975 // ---------------------------------------
20976
20977 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20978 match(Set dst (CopySignF dst src));
20979 effect(TEMP tmp1, TEMP tmp2);
20980 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20981 ins_encode %{
20982 __ movl($tmp2$$Register, 0x7FFFFFFF);
20983 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20984 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20985 %}
20986 ins_pipe( pipe_slow );
20987 %}
20988
20989 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20990 match(Set dst (CopySignD dst (Binary src zero)));
20991 ins_cost(100);
20992 effect(TEMP tmp1, TEMP tmp2);
20993 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20994 ins_encode %{
20995 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20996 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20997 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20998 %}
20999 ins_pipe( pipe_slow );
21000 %}
21001
21002 //----------------------------- CompressBits/ExpandBits ------------------------
21003
21004 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21005 predicate(n->bottom_type()->isa_int());
21006 match(Set dst (CompressBits src mask));
21007 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21008 ins_encode %{
21009 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21010 %}
21011 ins_pipe( pipe_slow );
21012 %}
21013
21014 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21015 predicate(n->bottom_type()->isa_int());
21016 match(Set dst (ExpandBits src mask));
21017 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21018 ins_encode %{
21019 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21020 %}
21021 ins_pipe( pipe_slow );
21022 %}
21023
21024 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21025 predicate(n->bottom_type()->isa_int());
21026 match(Set dst (CompressBits src (LoadI mask)));
21027 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21028 ins_encode %{
21029 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21030 %}
21031 ins_pipe( pipe_slow );
21032 %}
21033
21034 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21035 predicate(n->bottom_type()->isa_int());
21036 match(Set dst (ExpandBits src (LoadI mask)));
21037 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21038 ins_encode %{
21039 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21040 %}
21041 ins_pipe( pipe_slow );
21042 %}
21043
21044 // --------------------------------- Sqrt --------------------------------------
21045
21046 instruct vsqrtF_reg(vec dst, vec src) %{
21047 match(Set dst (SqrtVF src));
21048 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21049 ins_encode %{
21050 assert(UseAVX > 0, "required");
21051 int vlen_enc = vector_length_encoding(this);
21052 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21053 %}
21054 ins_pipe( pipe_slow );
21055 %}
21056
21057 instruct vsqrtF_mem(vec dst, memory mem) %{
21058 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21059 match(Set dst (SqrtVF (LoadVector mem)));
21060 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21061 ins_encode %{
21062 assert(UseAVX > 0, "required");
21063 int vlen_enc = vector_length_encoding(this);
21064 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21065 %}
21066 ins_pipe( pipe_slow );
21067 %}
21068
21069 // Floating point vector sqrt
21070 instruct vsqrtD_reg(vec dst, vec src) %{
21071 match(Set dst (SqrtVD src));
21072 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21073 ins_encode %{
21074 assert(UseAVX > 0, "required");
21075 int vlen_enc = vector_length_encoding(this);
21076 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21077 %}
21078 ins_pipe( pipe_slow );
21079 %}
21080
21081 instruct vsqrtD_mem(vec dst, memory mem) %{
21082 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21083 match(Set dst (SqrtVD (LoadVector mem)));
21084 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21085 ins_encode %{
21086 assert(UseAVX > 0, "required");
21087 int vlen_enc = vector_length_encoding(this);
21088 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21089 %}
21090 ins_pipe( pipe_slow );
21091 %}
21092
21093 // ------------------------------ Shift ---------------------------------------
21094
21095 // Left and right shift count vectors are the same on x86
21096 // (only lowest bits of xmm reg are used for count).
21097 instruct vshiftcnt(vec dst, rRegI cnt) %{
21098 match(Set dst (LShiftCntV cnt));
21099 match(Set dst (RShiftCntV cnt));
21100 format %{ "movdl $dst,$cnt\t! load shift count" %}
21101 ins_encode %{
21102 __ movdl($dst$$XMMRegister, $cnt$$Register);
21103 %}
21104 ins_pipe( pipe_slow );
21105 %}
21106
21107 // Byte vector shift
21108 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21109 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21110 match(Set dst ( LShiftVB src shift));
21111 match(Set dst ( RShiftVB src shift));
21112 match(Set dst (URShiftVB src shift));
21113 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21114 format %{"vector_byte_shift $dst,$src,$shift" %}
21115 ins_encode %{
21116 assert(UseSSE > 3, "required");
21117 int opcode = this->ideal_Opcode();
21118 bool sign = (opcode != Op_URShiftVB);
21119 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21120 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21121 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21122 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21123 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21124 %}
21125 ins_pipe( pipe_slow );
21126 %}
21127
21128 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21129 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21130 UseAVX <= 1);
21131 match(Set dst ( LShiftVB src shift));
21132 match(Set dst ( RShiftVB src shift));
21133 match(Set dst (URShiftVB src shift));
21134 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21135 format %{"vector_byte_shift $dst,$src,$shift" %}
21136 ins_encode %{
21137 assert(UseSSE > 3, "required");
21138 int opcode = this->ideal_Opcode();
21139 bool sign = (opcode != Op_URShiftVB);
21140 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21141 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21142 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21143 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21144 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21145 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21146 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21147 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21148 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21149 %}
21150 ins_pipe( pipe_slow );
21151 %}
21152
21153 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21154 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21155 UseAVX > 1);
21156 match(Set dst ( LShiftVB src shift));
21157 match(Set dst ( RShiftVB src shift));
21158 match(Set dst (URShiftVB src shift));
21159 effect(TEMP dst, TEMP tmp);
21160 format %{"vector_byte_shift $dst,$src,$shift" %}
21161 ins_encode %{
21162 int opcode = this->ideal_Opcode();
21163 bool sign = (opcode != Op_URShiftVB);
21164 int vlen_enc = Assembler::AVX_256bit;
21165 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21166 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21167 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21168 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21169 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21170 %}
21171 ins_pipe( pipe_slow );
21172 %}
21173
21174 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21175 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21176 match(Set dst ( LShiftVB src shift));
21177 match(Set dst ( RShiftVB src shift));
21178 match(Set dst (URShiftVB src shift));
21179 effect(TEMP dst, TEMP tmp);
21180 format %{"vector_byte_shift $dst,$src,$shift" %}
21181 ins_encode %{
21182 assert(UseAVX > 1, "required");
21183 int opcode = this->ideal_Opcode();
21184 bool sign = (opcode != Op_URShiftVB);
21185 int vlen_enc = Assembler::AVX_256bit;
21186 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21187 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21188 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21189 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21190 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21191 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21192 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21193 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21194 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21195 %}
21196 ins_pipe( pipe_slow );
21197 %}
21198
21199 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21200 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21201 match(Set dst ( LShiftVB src shift));
21202 match(Set dst (RShiftVB src shift));
21203 match(Set dst (URShiftVB src shift));
21204 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21205 format %{"vector_byte_shift $dst,$src,$shift" %}
21206 ins_encode %{
21207 assert(UseAVX > 2, "required");
21208 int opcode = this->ideal_Opcode();
21209 bool sign = (opcode != Op_URShiftVB);
21210 int vlen_enc = Assembler::AVX_512bit;
21211 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21212 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21213 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21214 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21215 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21216 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21217 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21218 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21219 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21220 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21221 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21222 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21223 %}
21224 ins_pipe( pipe_slow );
21225 %}
21226
21227 // Shorts vector logical right shift produces incorrect Java result
21228 // for negative data because java code convert short value into int with
21229 // sign extension before a shift. But char vectors are fine since chars are
21230 // unsigned values.
21231 // Shorts/Chars vector left shift
21232 instruct vshiftS(vec dst, vec src, vec shift) %{
21233 predicate(!n->as_ShiftV()->is_var_shift());
21234 match(Set dst ( LShiftVS src shift));
21235 match(Set dst ( RShiftVS src shift));
21236 match(Set dst (URShiftVS src shift));
21237 effect(TEMP dst, USE src, USE shift);
21238 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21239 ins_encode %{
21240 int opcode = this->ideal_Opcode();
21241 if (UseAVX > 0) {
21242 int vlen_enc = vector_length_encoding(this);
21243 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21244 } else {
21245 int vlen = Matcher::vector_length(this);
21246 if (vlen == 2) {
21247 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21248 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21249 } else if (vlen == 4) {
21250 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21251 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21252 } else {
21253 assert (vlen == 8, "sanity");
21254 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21255 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21256 }
21257 }
21258 %}
21259 ins_pipe( pipe_slow );
21260 %}
21261
21262 // Integers vector left shift
21263 instruct vshiftI(vec dst, vec src, vec shift) %{
21264 predicate(!n->as_ShiftV()->is_var_shift());
21265 match(Set dst ( LShiftVI src shift));
21266 match(Set dst ( RShiftVI src shift));
21267 match(Set dst (URShiftVI src shift));
21268 effect(TEMP dst, USE src, USE shift);
21269 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21270 ins_encode %{
21271 int opcode = this->ideal_Opcode();
21272 if (UseAVX > 0) {
21273 int vlen_enc = vector_length_encoding(this);
21274 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21275 } else {
21276 int vlen = Matcher::vector_length(this);
21277 if (vlen == 2) {
21278 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21279 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21280 } else {
21281 assert(vlen == 4, "sanity");
21282 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21283 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21284 }
21285 }
21286 %}
21287 ins_pipe( pipe_slow );
21288 %}
21289
21290 // Integers vector left constant shift
21291 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21292 match(Set dst (LShiftVI src (LShiftCntV shift)));
21293 match(Set dst (RShiftVI src (RShiftCntV shift)));
21294 match(Set dst (URShiftVI src (RShiftCntV shift)));
21295 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21296 ins_encode %{
21297 int opcode = this->ideal_Opcode();
21298 if (UseAVX > 0) {
21299 int vector_len = vector_length_encoding(this);
21300 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21301 } else {
21302 int vlen = Matcher::vector_length(this);
21303 if (vlen == 2) {
21304 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21305 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21306 } else {
21307 assert(vlen == 4, "sanity");
21308 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21309 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21310 }
21311 }
21312 %}
21313 ins_pipe( pipe_slow );
21314 %}
21315
21316 // Longs vector shift
21317 instruct vshiftL(vec dst, vec src, vec shift) %{
21318 predicate(!n->as_ShiftV()->is_var_shift());
21319 match(Set dst ( LShiftVL src shift));
21320 match(Set dst (URShiftVL src shift));
21321 effect(TEMP dst, USE src, USE shift);
21322 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21323 ins_encode %{
21324 int opcode = this->ideal_Opcode();
21325 if (UseAVX > 0) {
21326 int vlen_enc = vector_length_encoding(this);
21327 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21328 } else {
21329 assert(Matcher::vector_length(this) == 2, "");
21330 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21331 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21332 }
21333 %}
21334 ins_pipe( pipe_slow );
21335 %}
21336
21337 // Longs vector constant shift
21338 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21339 match(Set dst (LShiftVL src (LShiftCntV shift)));
21340 match(Set dst (URShiftVL src (RShiftCntV shift)));
21341 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21342 ins_encode %{
21343 int opcode = this->ideal_Opcode();
21344 if (UseAVX > 0) {
21345 int vector_len = vector_length_encoding(this);
21346 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21347 } else {
21348 assert(Matcher::vector_length(this) == 2, "");
21349 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21350 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21351 }
21352 %}
21353 ins_pipe( pipe_slow );
21354 %}
21355
21356 // -------------------ArithmeticRightShift -----------------------------------
21357 // Long vector arithmetic right shift
21358 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21359 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21360 match(Set dst (RShiftVL src shift));
21361 effect(TEMP dst, TEMP tmp);
21362 format %{ "vshiftq $dst,$src,$shift" %}
21363 ins_encode %{
21364 uint vlen = Matcher::vector_length(this);
21365 if (vlen == 2) {
21366 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21367 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21368 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21369 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21370 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21371 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21372 } else {
21373 assert(vlen == 4, "sanity");
21374 assert(UseAVX > 1, "required");
21375 int vlen_enc = Assembler::AVX_256bit;
21376 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21377 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21378 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21379 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21381 }
21382 %}
21383 ins_pipe( pipe_slow );
21384 %}
21385
21386 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21387 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21388 match(Set dst (RShiftVL src shift));
21389 format %{ "vshiftq $dst,$src,$shift" %}
21390 ins_encode %{
21391 int vlen_enc = vector_length_encoding(this);
21392 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21393 %}
21394 ins_pipe( pipe_slow );
21395 %}
21396
21397 // ------------------- Variable Shift -----------------------------
21398 // Byte variable shift
21399 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21400 predicate(Matcher::vector_length(n) <= 8 &&
21401 n->as_ShiftV()->is_var_shift() &&
21402 !VM_Version::supports_avx512bw());
21403 match(Set dst ( LShiftVB src shift));
21404 match(Set dst ( RShiftVB src shift));
21405 match(Set dst (URShiftVB src shift));
21406 effect(TEMP dst, TEMP vtmp);
21407 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21408 ins_encode %{
21409 assert(UseAVX >= 2, "required");
21410
21411 int opcode = this->ideal_Opcode();
21412 int vlen_enc = Assembler::AVX_128bit;
21413 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21414 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21415 %}
21416 ins_pipe( pipe_slow );
21417 %}
21418
21419 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21420 predicate(Matcher::vector_length(n) == 16 &&
21421 n->as_ShiftV()->is_var_shift() &&
21422 !VM_Version::supports_avx512bw());
21423 match(Set dst ( LShiftVB src shift));
21424 match(Set dst ( RShiftVB src shift));
21425 match(Set dst (URShiftVB src shift));
21426 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21427 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21428 ins_encode %{
21429 assert(UseAVX >= 2, "required");
21430
21431 int opcode = this->ideal_Opcode();
21432 int vlen_enc = Assembler::AVX_128bit;
21433 // Shift lower half and get word result in dst
21434 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21435
21436 // Shift upper half and get word result in vtmp1
21437 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21438 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21439 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21440
21441 // Merge and down convert the two word results to byte in dst
21442 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21443 %}
21444 ins_pipe( pipe_slow );
21445 %}
21446
21447 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21448 predicate(Matcher::vector_length(n) == 32 &&
21449 n->as_ShiftV()->is_var_shift() &&
21450 !VM_Version::supports_avx512bw());
21451 match(Set dst ( LShiftVB src shift));
21452 match(Set dst ( RShiftVB src shift));
21453 match(Set dst (URShiftVB src shift));
21454 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21455 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21456 ins_encode %{
21457 assert(UseAVX >= 2, "required");
21458
21459 int opcode = this->ideal_Opcode();
21460 int vlen_enc = Assembler::AVX_128bit;
21461 // Process lower 128 bits and get result in dst
21462 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21463 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21464 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21465 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21466 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21467
21468 // Process higher 128 bits and get result in vtmp3
21469 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21470 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21471 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21472 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21473 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21474 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21475 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21476
21477 // Merge the two results in dst
21478 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21479 %}
21480 ins_pipe( pipe_slow );
21481 %}
21482
21483 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21484 predicate(Matcher::vector_length(n) <= 32 &&
21485 n->as_ShiftV()->is_var_shift() &&
21486 VM_Version::supports_avx512bw());
21487 match(Set dst ( LShiftVB src shift));
21488 match(Set dst ( RShiftVB src shift));
21489 match(Set dst (URShiftVB src shift));
21490 effect(TEMP dst, TEMP vtmp);
21491 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21492 ins_encode %{
21493 assert(UseAVX > 2, "required");
21494
21495 int opcode = this->ideal_Opcode();
21496 int vlen_enc = vector_length_encoding(this);
21497 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21498 %}
21499 ins_pipe( pipe_slow );
21500 %}
21501
21502 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21503 predicate(Matcher::vector_length(n) == 64 &&
21504 n->as_ShiftV()->is_var_shift() &&
21505 VM_Version::supports_avx512bw());
21506 match(Set dst ( LShiftVB src shift));
21507 match(Set dst ( RShiftVB src shift));
21508 match(Set dst (URShiftVB src shift));
21509 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21510 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21511 ins_encode %{
21512 assert(UseAVX > 2, "required");
21513
21514 int opcode = this->ideal_Opcode();
21515 int vlen_enc = Assembler::AVX_256bit;
21516 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21517 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21518 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21519 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21520 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21521 %}
21522 ins_pipe( pipe_slow );
21523 %}
21524
21525 // Short variable shift
21526 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21527 predicate(Matcher::vector_length(n) <= 8 &&
21528 n->as_ShiftV()->is_var_shift() &&
21529 !VM_Version::supports_avx512bw());
21530 match(Set dst ( LShiftVS src shift));
21531 match(Set dst ( RShiftVS src shift));
21532 match(Set dst (URShiftVS src shift));
21533 effect(TEMP dst, TEMP vtmp);
21534 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21535 ins_encode %{
21536 assert(UseAVX >= 2, "required");
21537
21538 int opcode = this->ideal_Opcode();
21539 bool sign = (opcode != Op_URShiftVS);
21540 int vlen_enc = Assembler::AVX_256bit;
21541 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21542 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21543 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21544 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21545 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21546 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21547 %}
21548 ins_pipe( pipe_slow );
21549 %}
21550
21551 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21552 predicate(Matcher::vector_length(n) == 16 &&
21553 n->as_ShiftV()->is_var_shift() &&
21554 !VM_Version::supports_avx512bw());
21555 match(Set dst ( LShiftVS src shift));
21556 match(Set dst ( RShiftVS src shift));
21557 match(Set dst (URShiftVS src shift));
21558 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21559 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21560 ins_encode %{
21561 assert(UseAVX >= 2, "required");
21562
21563 int opcode = this->ideal_Opcode();
21564 bool sign = (opcode != Op_URShiftVS);
21565 int vlen_enc = Assembler::AVX_256bit;
21566 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21567 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21568 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21569 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21570 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21571
21572 // Shift upper half, with result in dst using vtmp1 as TEMP
21573 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21574 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21575 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21576 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21577 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21578 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21579
21580 // Merge lower and upper half result into dst
21581 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21582 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21583 %}
21584 ins_pipe( pipe_slow );
21585 %}
21586
21587 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21588 predicate(n->as_ShiftV()->is_var_shift() &&
21589 VM_Version::supports_avx512bw());
21590 match(Set dst ( LShiftVS src shift));
21591 match(Set dst ( RShiftVS src shift));
21592 match(Set dst (URShiftVS src shift));
21593 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21594 ins_encode %{
21595 assert(UseAVX > 2, "required");
21596
21597 int opcode = this->ideal_Opcode();
21598 int vlen_enc = vector_length_encoding(this);
21599 if (!VM_Version::supports_avx512vl()) {
21600 vlen_enc = Assembler::AVX_512bit;
21601 }
21602 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21603 %}
21604 ins_pipe( pipe_slow );
21605 %}
21606
21607 //Integer variable shift
21608 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21609 predicate(n->as_ShiftV()->is_var_shift());
21610 match(Set dst ( LShiftVI src shift));
21611 match(Set dst ( RShiftVI src shift));
21612 match(Set dst (URShiftVI src shift));
21613 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21614 ins_encode %{
21615 assert(UseAVX >= 2, "required");
21616
21617 int opcode = this->ideal_Opcode();
21618 int vlen_enc = vector_length_encoding(this);
21619 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21620 %}
21621 ins_pipe( pipe_slow );
21622 %}
21623
21624 //Long variable shift
21625 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21626 predicate(n->as_ShiftV()->is_var_shift());
21627 match(Set dst ( LShiftVL src shift));
21628 match(Set dst (URShiftVL src shift));
21629 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21630 ins_encode %{
21631 assert(UseAVX >= 2, "required");
21632
21633 int opcode = this->ideal_Opcode();
21634 int vlen_enc = vector_length_encoding(this);
21635 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21636 %}
21637 ins_pipe( pipe_slow );
21638 %}
21639
21640 //Long variable right shift arithmetic
21641 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21642 predicate(Matcher::vector_length(n) <= 4 &&
21643 n->as_ShiftV()->is_var_shift() &&
21644 UseAVX == 2);
21645 match(Set dst (RShiftVL src shift));
21646 effect(TEMP dst, TEMP vtmp);
21647 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21648 ins_encode %{
21649 int opcode = this->ideal_Opcode();
21650 int vlen_enc = vector_length_encoding(this);
21651 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21652 $vtmp$$XMMRegister);
21653 %}
21654 ins_pipe( pipe_slow );
21655 %}
21656
21657 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21658 predicate(n->as_ShiftV()->is_var_shift() &&
21659 UseAVX > 2);
21660 match(Set dst (RShiftVL src shift));
21661 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21662 ins_encode %{
21663 int opcode = this->ideal_Opcode();
21664 int vlen_enc = vector_length_encoding(this);
21665 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21666 %}
21667 ins_pipe( pipe_slow );
21668 %}
21669
21670 // --------------------------------- AND --------------------------------------
21671
21672 instruct vand(vec dst, vec src) %{
21673 predicate(UseAVX == 0);
21674 match(Set dst (AndV dst src));
21675 format %{ "pand $dst,$src\t! and vectors" %}
21676 ins_encode %{
21677 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21678 %}
21679 ins_pipe( pipe_slow );
21680 %}
21681
21682 instruct vand_reg(vec dst, vec src1, vec src2) %{
21683 predicate(UseAVX > 0);
21684 match(Set dst (AndV src1 src2));
21685 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21686 ins_encode %{
21687 int vlen_enc = vector_length_encoding(this);
21688 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21689 %}
21690 ins_pipe( pipe_slow );
21691 %}
21692
21693 instruct vand_mem(vec dst, vec src, memory mem) %{
21694 predicate((UseAVX > 0) &&
21695 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21696 match(Set dst (AndV src (LoadVector mem)));
21697 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21698 ins_encode %{
21699 int vlen_enc = vector_length_encoding(this);
21700 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21701 %}
21702 ins_pipe( pipe_slow );
21703 %}
21704
21705 // --------------------------------- OR ---------------------------------------
21706
21707 instruct vor(vec dst, vec src) %{
21708 predicate(UseAVX == 0);
21709 match(Set dst (OrV dst src));
21710 format %{ "por $dst,$src\t! or vectors" %}
21711 ins_encode %{
21712 __ por($dst$$XMMRegister, $src$$XMMRegister);
21713 %}
21714 ins_pipe( pipe_slow );
21715 %}
21716
21717 instruct vor_reg(vec dst, vec src1, vec src2) %{
21718 predicate(UseAVX > 0);
21719 match(Set dst (OrV src1 src2));
21720 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21721 ins_encode %{
21722 int vlen_enc = vector_length_encoding(this);
21723 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21724 %}
21725 ins_pipe( pipe_slow );
21726 %}
21727
21728 instruct vor_mem(vec dst, vec src, memory mem) %{
21729 predicate((UseAVX > 0) &&
21730 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21731 match(Set dst (OrV src (LoadVector mem)));
21732 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21733 ins_encode %{
21734 int vlen_enc = vector_length_encoding(this);
21735 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21736 %}
21737 ins_pipe( pipe_slow );
21738 %}
21739
21740 // --------------------------------- XOR --------------------------------------
21741
21742 instruct vxor(vec dst, vec src) %{
21743 predicate(UseAVX == 0);
21744 match(Set dst (XorV dst src));
21745 format %{ "pxor $dst,$src\t! xor vectors" %}
21746 ins_encode %{
21747 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21748 %}
21749 ins_pipe( pipe_slow );
21750 %}
21751
21752 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21753 predicate(UseAVX > 0);
21754 match(Set dst (XorV src1 src2));
21755 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21756 ins_encode %{
21757 int vlen_enc = vector_length_encoding(this);
21758 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21759 %}
21760 ins_pipe( pipe_slow );
21761 %}
21762
21763 instruct vxor_mem(vec dst, vec src, memory mem) %{
21764 predicate((UseAVX > 0) &&
21765 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21766 match(Set dst (XorV src (LoadVector mem)));
21767 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21768 ins_encode %{
21769 int vlen_enc = vector_length_encoding(this);
21770 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21771 %}
21772 ins_pipe( pipe_slow );
21773 %}
21774
21775 // --------------------------------- VectorCast --------------------------------------
21776
21777 instruct vcastBtoX(vec dst, vec src) %{
21778 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21779 match(Set dst (VectorCastB2X src));
21780 format %{ "vector_cast_b2x $dst,$src\t!" %}
21781 ins_encode %{
21782 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21783 int vlen_enc = vector_length_encoding(this);
21784 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21785 %}
21786 ins_pipe( pipe_slow );
21787 %}
21788
21789 instruct vcastBtoD(legVec dst, legVec src) %{
21790 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21791 match(Set dst (VectorCastB2X src));
21792 format %{ "vector_cast_b2x $dst,$src\t!" %}
21793 ins_encode %{
21794 int vlen_enc = vector_length_encoding(this);
21795 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21796 %}
21797 ins_pipe( pipe_slow );
21798 %}
21799
21800 instruct castStoX(vec dst, vec src) %{
21801 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21802 Matcher::vector_length(n->in(1)) <= 8 && // src
21803 Matcher::vector_element_basic_type(n) == T_BYTE);
21804 match(Set dst (VectorCastS2X src));
21805 format %{ "vector_cast_s2x $dst,$src" %}
21806 ins_encode %{
21807 assert(UseAVX > 0, "required");
21808
21809 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21810 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21811 %}
21812 ins_pipe( pipe_slow );
21813 %}
21814
21815 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21816 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21817 Matcher::vector_length(n->in(1)) == 16 && // src
21818 Matcher::vector_element_basic_type(n) == T_BYTE);
21819 effect(TEMP dst, TEMP vtmp);
21820 match(Set dst (VectorCastS2X src));
21821 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21822 ins_encode %{
21823 assert(UseAVX > 0, "required");
21824
21825 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21826 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21827 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21828 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21829 %}
21830 ins_pipe( pipe_slow );
21831 %}
21832
21833 instruct vcastStoX_evex(vec dst, vec src) %{
21834 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21835 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21836 match(Set dst (VectorCastS2X src));
21837 format %{ "vector_cast_s2x $dst,$src\t!" %}
21838 ins_encode %{
21839 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21840 int src_vlen_enc = vector_length_encoding(this, $src);
21841 int vlen_enc = vector_length_encoding(this);
21842 switch (to_elem_bt) {
21843 case T_BYTE:
21844 if (!VM_Version::supports_avx512vl()) {
21845 vlen_enc = Assembler::AVX_512bit;
21846 }
21847 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21848 break;
21849 case T_INT:
21850 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21851 break;
21852 case T_FLOAT:
21853 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21854 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21855 break;
21856 case T_LONG:
21857 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21858 break;
21859 case T_DOUBLE: {
21860 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21861 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21862 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21863 break;
21864 }
21865 default:
21866 ShouldNotReachHere();
21867 }
21868 %}
21869 ins_pipe( pipe_slow );
21870 %}
21871
21872 instruct castItoX(vec dst, vec src) %{
21873 predicate(UseAVX <= 2 &&
21874 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21875 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21876 match(Set dst (VectorCastI2X src));
21877 format %{ "vector_cast_i2x $dst,$src" %}
21878 ins_encode %{
21879 assert(UseAVX > 0, "required");
21880
21881 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21882 int vlen_enc = vector_length_encoding(this, $src);
21883
21884 if (to_elem_bt == T_BYTE) {
21885 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21886 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21887 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21888 } else {
21889 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21890 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21891 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21892 }
21893 %}
21894 ins_pipe( pipe_slow );
21895 %}
21896
21897 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21898 predicate(UseAVX <= 2 &&
21899 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21900 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21901 match(Set dst (VectorCastI2X src));
21902 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21903 effect(TEMP dst, TEMP vtmp);
21904 ins_encode %{
21905 assert(UseAVX > 0, "required");
21906
21907 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21908 int vlen_enc = vector_length_encoding(this, $src);
21909
21910 if (to_elem_bt == T_BYTE) {
21911 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21912 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21913 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21914 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21915 } else {
21916 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21917 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21918 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21919 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21920 }
21921 %}
21922 ins_pipe( pipe_slow );
21923 %}
21924
21925 instruct vcastItoX_evex(vec dst, vec src) %{
21926 predicate(UseAVX > 2 ||
21927 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21928 match(Set dst (VectorCastI2X src));
21929 format %{ "vector_cast_i2x $dst,$src\t!" %}
21930 ins_encode %{
21931 assert(UseAVX > 0, "required");
21932
21933 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21934 int src_vlen_enc = vector_length_encoding(this, $src);
21935 int dst_vlen_enc = vector_length_encoding(this);
21936 switch (dst_elem_bt) {
21937 case T_BYTE:
21938 if (!VM_Version::supports_avx512vl()) {
21939 src_vlen_enc = Assembler::AVX_512bit;
21940 }
21941 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21942 break;
21943 case T_SHORT:
21944 if (!VM_Version::supports_avx512vl()) {
21945 src_vlen_enc = Assembler::AVX_512bit;
21946 }
21947 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21948 break;
21949 case T_FLOAT:
21950 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21951 break;
21952 case T_LONG:
21953 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21954 break;
21955 case T_DOUBLE:
21956 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21957 break;
21958 default:
21959 ShouldNotReachHere();
21960 }
21961 %}
21962 ins_pipe( pipe_slow );
21963 %}
21964
21965 instruct vcastLtoBS(vec dst, vec src) %{
21966 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21967 UseAVX <= 2);
21968 match(Set dst (VectorCastL2X src));
21969 format %{ "vector_cast_l2x $dst,$src" %}
21970 ins_encode %{
21971 assert(UseAVX > 0, "required");
21972
21973 int vlen = Matcher::vector_length_in_bytes(this, $src);
21974 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21975 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21976 : ExternalAddress(vector_int_to_short_mask());
21977 if (vlen <= 16) {
21978 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21979 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21980 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21981 } else {
21982 assert(vlen <= 32, "required");
21983 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21984 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21985 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21986 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21987 }
21988 if (to_elem_bt == T_BYTE) {
21989 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21990 }
21991 %}
21992 ins_pipe( pipe_slow );
21993 %}
21994
21995 instruct vcastLtoX_evex(vec dst, vec src) %{
21996 predicate(UseAVX > 2 ||
21997 (Matcher::vector_element_basic_type(n) == T_INT ||
21998 Matcher::vector_element_basic_type(n) == T_FLOAT ||
21999 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22000 match(Set dst (VectorCastL2X src));
22001 format %{ "vector_cast_l2x $dst,$src\t!" %}
22002 ins_encode %{
22003 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22004 int vlen = Matcher::vector_length_in_bytes(this, $src);
22005 int vlen_enc = vector_length_encoding(this, $src);
22006 switch (to_elem_bt) {
22007 case T_BYTE:
22008 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22009 vlen_enc = Assembler::AVX_512bit;
22010 }
22011 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22012 break;
22013 case T_SHORT:
22014 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22015 vlen_enc = Assembler::AVX_512bit;
22016 }
22017 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22018 break;
22019 case T_INT:
22020 if (vlen == 8) {
22021 if ($dst$$XMMRegister != $src$$XMMRegister) {
22022 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22023 }
22024 } else if (vlen == 16) {
22025 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22026 } else if (vlen == 32) {
22027 if (UseAVX > 2) {
22028 if (!VM_Version::supports_avx512vl()) {
22029 vlen_enc = Assembler::AVX_512bit;
22030 }
22031 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22032 } else {
22033 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22034 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22035 }
22036 } else { // vlen == 64
22037 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22038 }
22039 break;
22040 case T_FLOAT:
22041 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22042 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043 break;
22044 case T_DOUBLE:
22045 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22046 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22047 break;
22048
22049 default: assert(false, "%s", type2name(to_elem_bt));
22050 }
22051 %}
22052 ins_pipe( pipe_slow );
22053 %}
22054
22055 instruct vcastFtoD_reg(vec dst, vec src) %{
22056 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22057 match(Set dst (VectorCastF2X src));
22058 format %{ "vector_cast_f2d $dst,$src\t!" %}
22059 ins_encode %{
22060 int vlen_enc = vector_length_encoding(this);
22061 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22062 %}
22063 ins_pipe( pipe_slow );
22064 %}
22065
22066
22067 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22068 predicate(!VM_Version::supports_avx10_2() &&
22069 !VM_Version::supports_avx512vl() &&
22070 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22071 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22072 is_integral_type(Matcher::vector_element_basic_type(n)));
22073 match(Set dst (VectorCastF2X src));
22074 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22075 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22076 ins_encode %{
22077 int vlen_enc = vector_length_encoding(this, $src);
22078 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22079 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22080 // 32 bit addresses for register indirect addressing mode since stub constants
22081 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22082 // However, targets are free to increase this limit, but having a large code cache size
22083 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22084 // cap we save a temporary register allocation which in limiting case can prevent
22085 // spilling in high register pressure blocks.
22086 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22087 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22088 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22089 %}
22090 ins_pipe( pipe_slow );
22091 %}
22092
22093 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22094 predicate(!VM_Version::supports_avx10_2() &&
22095 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22096 is_integral_type(Matcher::vector_element_basic_type(n)));
22097 match(Set dst (VectorCastF2X src));
22098 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22099 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22100 ins_encode %{
22101 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22102 if (to_elem_bt == T_LONG) {
22103 int vlen_enc = vector_length_encoding(this);
22104 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22105 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22106 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22107 } else {
22108 int vlen_enc = vector_length_encoding(this, $src);
22109 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22110 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22111 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22112 }
22113 %}
22114 ins_pipe( pipe_slow );
22115 %}
22116
22117 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22118 predicate(VM_Version::supports_avx10_2() &&
22119 is_integral_type(Matcher::vector_element_basic_type(n)));
22120 match(Set dst (VectorCastF2X src));
22121 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22122 ins_encode %{
22123 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22124 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22125 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22126 %}
22127 ins_pipe( pipe_slow );
22128 %}
22129
22130 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22131 predicate(VM_Version::supports_avx10_2() &&
22132 is_integral_type(Matcher::vector_element_basic_type(n)));
22133 match(Set dst (VectorCastF2X (LoadVector src)));
22134 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22135 ins_encode %{
22136 int vlen = Matcher::vector_length(this);
22137 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22138 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22139 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22140 %}
22141 ins_pipe( pipe_slow );
22142 %}
22143
22144 instruct vcastDtoF_reg(vec dst, vec src) %{
22145 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22146 match(Set dst (VectorCastD2X src));
22147 format %{ "vector_cast_d2x $dst,$src\t!" %}
22148 ins_encode %{
22149 int vlen_enc = vector_length_encoding(this, $src);
22150 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22151 %}
22152 ins_pipe( pipe_slow );
22153 %}
22154
22155 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22156 predicate(!VM_Version::supports_avx10_2() &&
22157 !VM_Version::supports_avx512vl() &&
22158 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22159 is_integral_type(Matcher::vector_element_basic_type(n)));
22160 match(Set dst (VectorCastD2X src));
22161 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22162 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22163 ins_encode %{
22164 int vlen_enc = vector_length_encoding(this, $src);
22165 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22166 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22167 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22168 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22169 %}
22170 ins_pipe( pipe_slow );
22171 %}
22172
22173 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22174 predicate(!VM_Version::supports_avx10_2() &&
22175 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22176 is_integral_type(Matcher::vector_element_basic_type(n)));
22177 match(Set dst (VectorCastD2X src));
22178 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22179 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22180 ins_encode %{
22181 int vlen_enc = vector_length_encoding(this, $src);
22182 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22183 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22184 ExternalAddress(vector_float_signflip());
22185 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22186 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22187 %}
22188 ins_pipe( pipe_slow );
22189 %}
22190
22191 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22192 predicate(VM_Version::supports_avx10_2() &&
22193 is_integral_type(Matcher::vector_element_basic_type(n)));
22194 match(Set dst (VectorCastD2X src));
22195 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22196 ins_encode %{
22197 int vlen_enc = vector_length_encoding(this, $src);
22198 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22199 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22200 %}
22201 ins_pipe( pipe_slow );
22202 %}
22203
22204 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22205 predicate(VM_Version::supports_avx10_2() &&
22206 is_integral_type(Matcher::vector_element_basic_type(n)));
22207 match(Set dst (VectorCastD2X (LoadVector src)));
22208 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22209 ins_encode %{
22210 int vlen = Matcher::vector_length(this);
22211 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22212 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22213 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22214 %}
22215 ins_pipe( pipe_slow );
22216 %}
22217
22218 instruct vucast(vec dst, vec src) %{
22219 match(Set dst (VectorUCastB2X src));
22220 match(Set dst (VectorUCastS2X src));
22221 match(Set dst (VectorUCastI2X src));
22222 format %{ "vector_ucast $dst,$src\t!" %}
22223 ins_encode %{
22224 assert(UseAVX > 0, "required");
22225
22226 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22227 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22228 int vlen_enc = vector_length_encoding(this);
22229 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22230 %}
22231 ins_pipe( pipe_slow );
22232 %}
22233
22234 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22235 predicate(!VM_Version::supports_avx512vl() &&
22236 Matcher::vector_length_in_bytes(n) < 64 &&
22237 Matcher::vector_element_basic_type(n) == T_INT);
22238 match(Set dst (RoundVF src));
22239 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22240 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22241 ins_encode %{
22242 int vlen_enc = vector_length_encoding(this);
22243 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22244 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22245 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22246 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22247 %}
22248 ins_pipe( pipe_slow );
22249 %}
22250
22251 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22252 predicate((VM_Version::supports_avx512vl() ||
22253 Matcher::vector_length_in_bytes(n) == 64) &&
22254 Matcher::vector_element_basic_type(n) == T_INT);
22255 match(Set dst (RoundVF src));
22256 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22257 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22258 ins_encode %{
22259 int vlen_enc = vector_length_encoding(this);
22260 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22261 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22262 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22263 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22264 %}
22265 ins_pipe( pipe_slow );
22266 %}
22267
22268 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22269 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22270 match(Set dst (RoundVD src));
22271 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22272 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22273 ins_encode %{
22274 int vlen_enc = vector_length_encoding(this);
22275 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22276 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22277 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22278 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22279 %}
22280 ins_pipe( pipe_slow );
22281 %}
22282
22283 // --------------------------------- VectorMaskCmp --------------------------------------
22284
22285 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22286 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22287 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22288 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22289 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22290 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22291 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22292 ins_encode %{
22293 int vlen_enc = vector_length_encoding(this, $src1);
22294 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22295 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22296 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22297 } else {
22298 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22299 }
22300 %}
22301 ins_pipe( pipe_slow );
22302 %}
22303
22304 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22305 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22306 n->bottom_type()->isa_vectmask() == nullptr &&
22307 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22308 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22309 effect(TEMP ktmp);
22310 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22311 ins_encode %{
22312 int vlen_enc = Assembler::AVX_512bit;
22313 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22314 KRegister mask = k0; // The comparison itself is not being masked.
22315 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22316 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22317 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22318 } else {
22319 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22320 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22321 }
22322 %}
22323 ins_pipe( pipe_slow );
22324 %}
22325
22326 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22327 predicate(n->bottom_type()->isa_vectmask() &&
22328 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22329 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22330 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22331 ins_encode %{
22332 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22333 int vlen_enc = vector_length_encoding(this, $src1);
22334 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22335 KRegister mask = k0; // The comparison itself is not being masked.
22336 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22337 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22338 } else {
22339 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22340 }
22341 %}
22342 ins_pipe( pipe_slow );
22343 %}
22344
22345 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22346 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22347 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22348 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22349 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22350 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22351 (n->in(2)->get_int() == BoolTest::eq ||
22352 n->in(2)->get_int() == BoolTest::lt ||
22353 n->in(2)->get_int() == BoolTest::gt)); // cond
22354 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22355 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22356 ins_encode %{
22357 int vlen_enc = vector_length_encoding(this, $src1);
22358 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22359 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22360 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22361 %}
22362 ins_pipe( pipe_slow );
22363 %}
22364
22365 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22366 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22367 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22368 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22369 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22370 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22371 (n->in(2)->get_int() == BoolTest::ne ||
22372 n->in(2)->get_int() == BoolTest::le ||
22373 n->in(2)->get_int() == BoolTest::ge)); // cond
22374 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22375 effect(TEMP dst, TEMP xtmp);
22376 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22377 ins_encode %{
22378 int vlen_enc = vector_length_encoding(this, $src1);
22379 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22380 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22381 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22382 %}
22383 ins_pipe( pipe_slow );
22384 %}
22385
22386 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22387 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22388 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22389 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22390 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22391 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22392 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22393 effect(TEMP dst, TEMP xtmp);
22394 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22395 ins_encode %{
22396 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22397 int vlen_enc = vector_length_encoding(this, $src1);
22398 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22399 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22400
22401 if (vlen_enc == Assembler::AVX_128bit) {
22402 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22403 } else {
22404 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22405 }
22406 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22407 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22408 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22409 %}
22410 ins_pipe( pipe_slow );
22411 %}
22412
22413 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22414 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22415 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22416 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22417 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22418 effect(TEMP ktmp);
22419 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22420 ins_encode %{
22421 assert(UseAVX > 2, "required");
22422
22423 int vlen_enc = vector_length_encoding(this, $src1);
22424 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22425 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22426 KRegister mask = k0; // The comparison itself is not being masked.
22427 bool merge = false;
22428 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22429
22430 switch (src1_elem_bt) {
22431 case T_INT: {
22432 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22433 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22434 break;
22435 }
22436 case T_LONG: {
22437 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22438 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22439 break;
22440 }
22441 default: assert(false, "%s", type2name(src1_elem_bt));
22442 }
22443 %}
22444 ins_pipe( pipe_slow );
22445 %}
22446
22447
22448 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22449 predicate(n->bottom_type()->isa_vectmask() &&
22450 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22451 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22452 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22453 ins_encode %{
22454 assert(UseAVX > 2, "required");
22455 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22456
22457 int vlen_enc = vector_length_encoding(this, $src1);
22458 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22459 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22460 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22461
22462 // Comparison i
22463 switch (src1_elem_bt) {
22464 case T_BYTE: {
22465 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22466 break;
22467 }
22468 case T_SHORT: {
22469 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22470 break;
22471 }
22472 case T_INT: {
22473 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22474 break;
22475 }
22476 case T_LONG: {
22477 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22478 break;
22479 }
22480 default: assert(false, "%s", type2name(src1_elem_bt));
22481 }
22482 %}
22483 ins_pipe( pipe_slow );
22484 %}
22485
22486 // Extract
22487
22488 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22489 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22490 match(Set dst (ExtractI src idx));
22491 match(Set dst (ExtractS src idx));
22492 match(Set dst (ExtractB src idx));
22493 format %{ "extractI $dst,$src,$idx\t!" %}
22494 ins_encode %{
22495 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22496
22497 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22498 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22499 %}
22500 ins_pipe( pipe_slow );
22501 %}
22502
22503 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22504 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22505 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22506 match(Set dst (ExtractI src idx));
22507 match(Set dst (ExtractS src idx));
22508 match(Set dst (ExtractB src idx));
22509 effect(TEMP vtmp);
22510 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22511 ins_encode %{
22512 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22513
22514 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22515 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22516 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22517 %}
22518 ins_pipe( pipe_slow );
22519 %}
22520
22521 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22522 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22523 match(Set dst (ExtractL src idx));
22524 format %{ "extractL $dst,$src,$idx\t!" %}
22525 ins_encode %{
22526 assert(UseSSE >= 4, "required");
22527 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22528
22529 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22530 %}
22531 ins_pipe( pipe_slow );
22532 %}
22533
22534 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22535 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22536 Matcher::vector_length(n->in(1)) == 8); // src
22537 match(Set dst (ExtractL src idx));
22538 effect(TEMP vtmp);
22539 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22540 ins_encode %{
22541 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22542
22543 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22544 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22545 %}
22546 ins_pipe( pipe_slow );
22547 %}
22548
22549 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22550 predicate(Matcher::vector_length(n->in(1)) <= 4);
22551 match(Set dst (ExtractF src idx));
22552 effect(TEMP dst, TEMP vtmp);
22553 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22554 ins_encode %{
22555 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22556
22557 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22558 %}
22559 ins_pipe( pipe_slow );
22560 %}
22561
22562 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22563 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22564 Matcher::vector_length(n->in(1)/*src*/) == 16);
22565 match(Set dst (ExtractF src idx));
22566 effect(TEMP vtmp);
22567 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22568 ins_encode %{
22569 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22570
22571 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22572 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22573 %}
22574 ins_pipe( pipe_slow );
22575 %}
22576
22577 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22578 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22579 match(Set dst (ExtractD src idx));
22580 format %{ "extractD $dst,$src,$idx\t!" %}
22581 ins_encode %{
22582 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22583
22584 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22585 %}
22586 ins_pipe( pipe_slow );
22587 %}
22588
22589 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22590 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22591 Matcher::vector_length(n->in(1)) == 8); // src
22592 match(Set dst (ExtractD src idx));
22593 effect(TEMP vtmp);
22594 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22595 ins_encode %{
22596 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22597
22598 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22599 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22600 %}
22601 ins_pipe( pipe_slow );
22602 %}
22603
22604 // --------------------------------- Vector Blend --------------------------------------
22605
22606 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22607 predicate(UseAVX == 0);
22608 match(Set dst (VectorBlend (Binary dst src) mask));
22609 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22610 effect(TEMP tmp);
22611 ins_encode %{
22612 assert(UseSSE >= 4, "required");
22613
22614 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22615 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22616 }
22617 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22618 %}
22619 ins_pipe( pipe_slow );
22620 %}
22621
22622 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22623 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22624 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22625 Matcher::vector_length_in_bytes(n) <= 32 &&
22626 is_integral_type(Matcher::vector_element_basic_type(n)));
22627 match(Set dst (VectorBlend (Binary src1 src2) mask));
22628 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22629 ins_encode %{
22630 int vlen_enc = vector_length_encoding(this);
22631 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22632 %}
22633 ins_pipe( pipe_slow );
22634 %}
22635
22636 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22637 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22638 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22639 Matcher::vector_length_in_bytes(n) <= 32 &&
22640 !is_integral_type(Matcher::vector_element_basic_type(n)));
22641 match(Set dst (VectorBlend (Binary src1 src2) mask));
22642 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22643 ins_encode %{
22644 int vlen_enc = vector_length_encoding(this);
22645 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22646 %}
22647 ins_pipe( pipe_slow );
22648 %}
22649
22650 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22651 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22652 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22653 Matcher::vector_length_in_bytes(n) <= 32);
22654 match(Set dst (VectorBlend (Binary src1 src2) mask));
22655 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22656 effect(TEMP vtmp, TEMP dst);
22657 ins_encode %{
22658 int vlen_enc = vector_length_encoding(this);
22659 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22660 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22661 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22662 %}
22663 ins_pipe( pipe_slow );
22664 %}
22665
22666 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22667 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22668 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22669 match(Set dst (VectorBlend (Binary src1 src2) mask));
22670 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22671 effect(TEMP ktmp);
22672 ins_encode %{
22673 int vlen_enc = Assembler::AVX_512bit;
22674 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22675 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22676 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22677 %}
22678 ins_pipe( pipe_slow );
22679 %}
22680
22681
22682 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22683 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22684 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22685 VM_Version::supports_avx512bw()));
22686 match(Set dst (VectorBlend (Binary src1 src2) mask));
22687 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22688 ins_encode %{
22689 int vlen_enc = vector_length_encoding(this);
22690 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22691 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22692 %}
22693 ins_pipe( pipe_slow );
22694 %}
22695
22696 // --------------------------------- ABS --------------------------------------
22697 // a = |a|
22698 instruct vabsB_reg(vec dst, vec src) %{
22699 match(Set dst (AbsVB src));
22700 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22701 ins_encode %{
22702 uint vlen = Matcher::vector_length(this);
22703 if (vlen <= 16) {
22704 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22705 } else {
22706 int vlen_enc = vector_length_encoding(this);
22707 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22708 }
22709 %}
22710 ins_pipe( pipe_slow );
22711 %}
22712
22713 instruct vabsS_reg(vec dst, vec src) %{
22714 match(Set dst (AbsVS src));
22715 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22716 ins_encode %{
22717 uint vlen = Matcher::vector_length(this);
22718 if (vlen <= 8) {
22719 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22720 } else {
22721 int vlen_enc = vector_length_encoding(this);
22722 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22723 }
22724 %}
22725 ins_pipe( pipe_slow );
22726 %}
22727
22728 instruct vabsI_reg(vec dst, vec src) %{
22729 match(Set dst (AbsVI src));
22730 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22731 ins_encode %{
22732 uint vlen = Matcher::vector_length(this);
22733 if (vlen <= 4) {
22734 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22735 } else {
22736 int vlen_enc = vector_length_encoding(this);
22737 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22738 }
22739 %}
22740 ins_pipe( pipe_slow );
22741 %}
22742
22743 instruct vabsL_reg(vec dst, vec src) %{
22744 match(Set dst (AbsVL src));
22745 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22746 ins_encode %{
22747 assert(UseAVX > 2, "required");
22748 int vlen_enc = vector_length_encoding(this);
22749 if (!VM_Version::supports_avx512vl()) {
22750 vlen_enc = Assembler::AVX_512bit;
22751 }
22752 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22753 %}
22754 ins_pipe( pipe_slow );
22755 %}
22756
22757 // --------------------------------- ABSNEG --------------------------------------
22758
22759 instruct vabsnegF(vec dst, vec src) %{
22760 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22761 match(Set dst (AbsVF src));
22762 match(Set dst (NegVF src));
22763 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22764 ins_cost(150);
22765 ins_encode %{
22766 int opcode = this->ideal_Opcode();
22767 int vlen = Matcher::vector_length(this);
22768 if (vlen == 2) {
22769 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22770 } else {
22771 assert(vlen == 8 || vlen == 16, "required");
22772 int vlen_enc = vector_length_encoding(this);
22773 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22774 }
22775 %}
22776 ins_pipe( pipe_slow );
22777 %}
22778
22779 instruct vabsneg4F(vec dst) %{
22780 predicate(Matcher::vector_length(n) == 4);
22781 match(Set dst (AbsVF dst));
22782 match(Set dst (NegVF dst));
22783 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22784 ins_cost(150);
22785 ins_encode %{
22786 int opcode = this->ideal_Opcode();
22787 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22788 %}
22789 ins_pipe( pipe_slow );
22790 %}
22791
22792 instruct vabsnegD(vec dst, vec src) %{
22793 match(Set dst (AbsVD src));
22794 match(Set dst (NegVD src));
22795 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22796 ins_encode %{
22797 int opcode = this->ideal_Opcode();
22798 uint vlen = Matcher::vector_length(this);
22799 if (vlen == 2) {
22800 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22801 } else {
22802 int vlen_enc = vector_length_encoding(this);
22803 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22804 }
22805 %}
22806 ins_pipe( pipe_slow );
22807 %}
22808
22809 //------------------------------------- VectorTest --------------------------------------------
22810
22811 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22812 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22813 match(Set cr (VectorTest src1 src2));
22814 effect(TEMP vtmp);
22815 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22816 ins_encode %{
22817 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22818 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22819 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22820 %}
22821 ins_pipe( pipe_slow );
22822 %}
22823
22824 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22825 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22826 match(Set cr (VectorTest src1 src2));
22827 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22828 ins_encode %{
22829 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22830 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22831 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22832 %}
22833 ins_pipe( pipe_slow );
22834 %}
22835
22836 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22837 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22838 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22839 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22840 match(Set cr (VectorTest src1 src2));
22841 effect(TEMP tmp);
22842 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22843 ins_encode %{
22844 uint masklen = Matcher::vector_length(this, $src1);
22845 __ kmovwl($tmp$$Register, $src1$$KRegister);
22846 __ andl($tmp$$Register, (1 << masklen) - 1);
22847 __ cmpl($tmp$$Register, (1 << masklen) - 1);
22848 %}
22849 ins_pipe( pipe_slow );
22850 %}
22851
22852 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22853 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22854 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22855 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22856 match(Set cr (VectorTest src1 src2));
22857 effect(TEMP tmp);
22858 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22859 ins_encode %{
22860 uint masklen = Matcher::vector_length(this, $src1);
22861 __ kmovwl($tmp$$Register, $src1$$KRegister);
22862 __ andl($tmp$$Register, (1 << masklen) - 1);
22863 %}
22864 ins_pipe( pipe_slow );
22865 %}
22866
22867 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22868 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22869 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22870 match(Set cr (VectorTest src1 src2));
22871 format %{ "ktest_ge8 $src1, $src2\n\t" %}
22872 ins_encode %{
22873 uint masklen = Matcher::vector_length(this, $src1);
22874 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22875 %}
22876 ins_pipe( pipe_slow );
22877 %}
22878
22879 //------------------------------------- LoadMask --------------------------------------------
22880
22881 instruct loadMask(legVec dst, legVec src) %{
22882 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22883 match(Set dst (VectorLoadMask src));
22884 effect(TEMP dst);
22885 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22886 ins_encode %{
22887 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22888 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22889 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22890 %}
22891 ins_pipe( pipe_slow );
22892 %}
22893
22894 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22895 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
22896 match(Set dst (VectorLoadMask src));
22897 effect(TEMP xtmp);
22898 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22899 ins_encode %{
22900 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22901 true, Assembler::AVX_512bit);
22902 %}
22903 ins_pipe( pipe_slow );
22904 %}
22905
22906 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
22907 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
22908 match(Set dst (VectorLoadMask src));
22909 effect(TEMP xtmp);
22910 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22911 ins_encode %{
22912 int vlen_enc = vector_length_encoding(in(1));
22913 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22914 false, vlen_enc);
22915 %}
22916 ins_pipe( pipe_slow );
22917 %}
22918
22919 //------------------------------------- StoreMask --------------------------------------------
22920
22921 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22922 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22923 match(Set dst (VectorStoreMask src size));
22924 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22925 ins_encode %{
22926 int vlen = Matcher::vector_length(this);
22927 if (vlen <= 16 && UseAVX <= 2) {
22928 assert(UseSSE >= 3, "required");
22929 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22930 } else {
22931 assert(UseAVX > 0, "required");
22932 int src_vlen_enc = vector_length_encoding(this, $src);
22933 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22934 }
22935 %}
22936 ins_pipe( pipe_slow );
22937 %}
22938
22939 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22940 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22941 match(Set dst (VectorStoreMask src size));
22942 effect(TEMP_DEF dst, TEMP xtmp);
22943 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22944 ins_encode %{
22945 int vlen_enc = Assembler::AVX_128bit;
22946 int vlen = Matcher::vector_length(this);
22947 if (vlen <= 8) {
22948 assert(UseSSE >= 3, "required");
22949 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22950 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22951 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22952 } else {
22953 assert(UseAVX > 0, "required");
22954 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22955 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22956 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22957 }
22958 %}
22959 ins_pipe( pipe_slow );
22960 %}
22961
22962 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22963 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
22964 match(Set dst (VectorStoreMask src size));
22965 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22966 effect(TEMP_DEF dst, TEMP xtmp);
22967 ins_encode %{
22968 int vlen_enc = Assembler::AVX_128bit;
22969 int vlen = Matcher::vector_length(this);
22970 if (vlen <= 4) {
22971 assert(UseSSE >= 3, "required");
22972 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22973 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22974 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22975 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22976 } else {
22977 assert(UseAVX > 0, "required");
22978 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22979 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22980 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22981 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22982 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22983 }
22984 %}
22985 ins_pipe( pipe_slow );
22986 %}
22987
22988 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22989 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22990 match(Set dst (VectorStoreMask src size));
22991 effect(TEMP_DEF dst, TEMP xtmp);
22992 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22993 ins_encode %{
22994 assert(UseSSE >= 3, "required");
22995 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22996 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22997 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22998 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22999 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23000 %}
23001 ins_pipe( pipe_slow );
23002 %}
23003
23004 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23005 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23006 match(Set dst (VectorStoreMask src size));
23007 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23008 effect(TEMP_DEF dst, TEMP vtmp);
23009 ins_encode %{
23010 int vlen_enc = Assembler::AVX_128bit;
23011 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23012 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23013 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23014 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23015 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23016 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23017 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23018 %}
23019 ins_pipe( pipe_slow );
23020 %}
23021
23022 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23023 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23024 match(Set dst (VectorStoreMask src size));
23025 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23026 ins_encode %{
23027 int src_vlen_enc = vector_length_encoding(this, $src);
23028 int dst_vlen_enc = vector_length_encoding(this);
23029 if (!VM_Version::supports_avx512vl()) {
23030 src_vlen_enc = Assembler::AVX_512bit;
23031 }
23032 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23033 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23034 %}
23035 ins_pipe( pipe_slow );
23036 %}
23037
23038 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23039 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23040 match(Set dst (VectorStoreMask src size));
23041 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23042 ins_encode %{
23043 int src_vlen_enc = vector_length_encoding(this, $src);
23044 int dst_vlen_enc = vector_length_encoding(this);
23045 if (!VM_Version::supports_avx512vl()) {
23046 src_vlen_enc = Assembler::AVX_512bit;
23047 }
23048 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23049 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23050 %}
23051 ins_pipe( pipe_slow );
23052 %}
23053
23054 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23055 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23056 match(Set dst (VectorStoreMask mask size));
23057 effect(TEMP_DEF dst);
23058 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23059 ins_encode %{
23060 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23061 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23062 false, Assembler::AVX_512bit, noreg);
23063 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23064 %}
23065 ins_pipe( pipe_slow );
23066 %}
23067
23068 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23069 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23070 match(Set dst (VectorStoreMask mask size));
23071 effect(TEMP_DEF dst);
23072 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23073 ins_encode %{
23074 int dst_vlen_enc = vector_length_encoding(this);
23075 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23076 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23077 %}
23078 ins_pipe( pipe_slow );
23079 %}
23080
23081 instruct vmaskcast_evex(kReg dst) %{
23082 match(Set dst (VectorMaskCast dst));
23083 ins_cost(0);
23084 format %{ "vector_mask_cast $dst" %}
23085 ins_encode %{
23086 // empty
23087 %}
23088 ins_pipe(empty);
23089 %}
23090
23091 instruct vmaskcast(vec dst) %{
23092 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23093 match(Set dst (VectorMaskCast dst));
23094 ins_cost(0);
23095 format %{ "vector_mask_cast $dst" %}
23096 ins_encode %{
23097 // empty
23098 %}
23099 ins_pipe(empty);
23100 %}
23101
23102 instruct vmaskcast_avx(vec dst, vec src) %{
23103 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23104 match(Set dst (VectorMaskCast src));
23105 format %{ "vector_mask_cast $dst, $src" %}
23106 ins_encode %{
23107 int vlen = Matcher::vector_length(this);
23108 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23109 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23110 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23111 %}
23112 ins_pipe(pipe_slow);
23113 %}
23114
23115 //-------------------------------- Load Iota Indices ----------------------------------
23116
23117 instruct loadIotaIndices(vec dst, immI_0 src) %{
23118 match(Set dst (VectorLoadConst src));
23119 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23120 ins_encode %{
23121 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23122 BasicType bt = Matcher::vector_element_basic_type(this);
23123 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23124 %}
23125 ins_pipe( pipe_slow );
23126 %}
23127
23128 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23129 match(Set dst (PopulateIndex src1 src2));
23130 effect(TEMP dst, TEMP vtmp);
23131 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23132 ins_encode %{
23133 assert($src2$$constant == 1, "required");
23134 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23135 int vlen_enc = vector_length_encoding(this);
23136 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23137 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23138 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23139 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23140 %}
23141 ins_pipe( pipe_slow );
23142 %}
23143
23144 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23145 match(Set dst (PopulateIndex src1 src2));
23146 effect(TEMP dst, TEMP vtmp);
23147 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23148 ins_encode %{
23149 assert($src2$$constant == 1, "required");
23150 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23151 int vlen_enc = vector_length_encoding(this);
23152 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23153 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23154 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23155 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23156 %}
23157 ins_pipe( pipe_slow );
23158 %}
23159
23160 //-------------------------------- Rearrange ----------------------------------
23161
23162 // LoadShuffle/Rearrange for Byte
23163 instruct rearrangeB(vec dst, vec shuffle) %{
23164 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23165 Matcher::vector_length(n) < 32);
23166 match(Set dst (VectorRearrange dst shuffle));
23167 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23168 ins_encode %{
23169 assert(UseSSE >= 4, "required");
23170 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23171 %}
23172 ins_pipe( pipe_slow );
23173 %}
23174
23175 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23176 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23177 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23178 match(Set dst (VectorRearrange src shuffle));
23179 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23180 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23181 ins_encode %{
23182 assert(UseAVX >= 2, "required");
23183 // Swap src into vtmp1
23184 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23185 // Shuffle swapped src to get entries from other 128 bit lane
23186 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23187 // Shuffle original src to get entries from self 128 bit lane
23188 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23189 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23190 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23191 // Perform the blend
23192 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23193 %}
23194 ins_pipe( pipe_slow );
23195 %}
23196
23197
23198 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23199 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23200 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23201 match(Set dst (VectorRearrange src shuffle));
23202 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23203 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23204 ins_encode %{
23205 int vlen_enc = vector_length_encoding(this);
23206 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23207 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23208 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23209 %}
23210 ins_pipe( pipe_slow );
23211 %}
23212
23213 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23214 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23215 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23216 match(Set dst (VectorRearrange src shuffle));
23217 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23218 ins_encode %{
23219 int vlen_enc = vector_length_encoding(this);
23220 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23221 %}
23222 ins_pipe( pipe_slow );
23223 %}
23224
23225 // LoadShuffle/Rearrange for Short
23226
23227 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23228 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23229 !VM_Version::supports_avx512bw());
23230 match(Set dst (VectorLoadShuffle src));
23231 effect(TEMP dst, TEMP vtmp);
23232 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23233 ins_encode %{
23234 // Create a byte shuffle mask from short shuffle mask
23235 // only byte shuffle instruction available on these platforms
23236 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23237 if (UseAVX == 0) {
23238 assert(vlen_in_bytes <= 16, "required");
23239 // Multiply each shuffle by two to get byte index
23240 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23241 __ psllw($vtmp$$XMMRegister, 1);
23242
23243 // Duplicate to create 2 copies of byte index
23244 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23245 __ psllw($dst$$XMMRegister, 8);
23246 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23247
23248 // Add one to get alternate byte index
23249 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23250 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23251 } else {
23252 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23253 int vlen_enc = vector_length_encoding(this);
23254 // Multiply each shuffle by two to get byte index
23255 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23256
23257 // Duplicate to create 2 copies of byte index
23258 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23259 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23260
23261 // Add one to get alternate byte index
23262 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23263 }
23264 %}
23265 ins_pipe( pipe_slow );
23266 %}
23267
23268 instruct rearrangeS(vec dst, vec shuffle) %{
23269 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23270 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23271 match(Set dst (VectorRearrange dst shuffle));
23272 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23273 ins_encode %{
23274 assert(UseSSE >= 4, "required");
23275 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23276 %}
23277 ins_pipe( pipe_slow );
23278 %}
23279
23280 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23281 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23282 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23283 match(Set dst (VectorRearrange src shuffle));
23284 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23285 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23286 ins_encode %{
23287 assert(UseAVX >= 2, "required");
23288 // Swap src into vtmp1
23289 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23290 // Shuffle swapped src to get entries from other 128 bit lane
23291 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23292 // Shuffle original src to get entries from self 128 bit lane
23293 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23294 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23295 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23296 // Perform the blend
23297 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23298 %}
23299 ins_pipe( pipe_slow );
23300 %}
23301
23302 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23303 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23304 VM_Version::supports_avx512bw());
23305 match(Set dst (VectorRearrange src shuffle));
23306 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23307 ins_encode %{
23308 int vlen_enc = vector_length_encoding(this);
23309 if (!VM_Version::supports_avx512vl()) {
23310 vlen_enc = Assembler::AVX_512bit;
23311 }
23312 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23313 %}
23314 ins_pipe( pipe_slow );
23315 %}
23316
23317 // LoadShuffle/Rearrange for Integer and Float
23318
23319 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23320 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23321 Matcher::vector_length(n) == 4 && UseAVX == 0);
23322 match(Set dst (VectorLoadShuffle src));
23323 effect(TEMP dst, TEMP vtmp);
23324 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23325 ins_encode %{
23326 assert(UseSSE >= 4, "required");
23327
23328 // Create a byte shuffle mask from int shuffle mask
23329 // only byte shuffle instruction available on these platforms
23330
23331 // Duplicate and multiply each shuffle by 4
23332 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23333 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23334 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23335 __ psllw($vtmp$$XMMRegister, 2);
23336
23337 // Duplicate again to create 4 copies of byte index
23338 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23339 __ psllw($dst$$XMMRegister, 8);
23340 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23341
23342 // Add 3,2,1,0 to get alternate byte index
23343 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23344 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23345 %}
23346 ins_pipe( pipe_slow );
23347 %}
23348
23349 instruct rearrangeI(vec dst, vec shuffle) %{
23350 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23351 UseAVX == 0);
23352 match(Set dst (VectorRearrange dst shuffle));
23353 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23354 ins_encode %{
23355 assert(UseSSE >= 4, "required");
23356 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23357 %}
23358 ins_pipe( pipe_slow );
23359 %}
23360
23361 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23362 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23363 UseAVX > 0);
23364 match(Set dst (VectorRearrange src shuffle));
23365 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23366 ins_encode %{
23367 int vlen_enc = vector_length_encoding(this);
23368 BasicType bt = Matcher::vector_element_basic_type(this);
23369 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23370 %}
23371 ins_pipe( pipe_slow );
23372 %}
23373
23374 // LoadShuffle/Rearrange for Long and Double
23375
23376 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23377 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23378 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23379 match(Set dst (VectorLoadShuffle src));
23380 effect(TEMP dst, TEMP vtmp);
23381 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23382 ins_encode %{
23383 assert(UseAVX >= 2, "required");
23384
23385 int vlen_enc = vector_length_encoding(this);
23386 // Create a double word shuffle mask from long shuffle mask
23387 // only double word shuffle instruction available on these platforms
23388
23389 // Multiply each shuffle by two to get double word index
23390 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23391
23392 // Duplicate each double word shuffle
23393 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23394 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23395
23396 // Add one to get alternate double word index
23397 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23398 %}
23399 ins_pipe( pipe_slow );
23400 %}
23401
23402 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23403 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23404 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23405 match(Set dst (VectorRearrange src shuffle));
23406 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23407 ins_encode %{
23408 assert(UseAVX >= 2, "required");
23409
23410 int vlen_enc = vector_length_encoding(this);
23411 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23412 %}
23413 ins_pipe( pipe_slow );
23414 %}
23415
23416 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23417 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23418 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23419 match(Set dst (VectorRearrange src shuffle));
23420 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23421 ins_encode %{
23422 assert(UseAVX > 2, "required");
23423
23424 int vlen_enc = vector_length_encoding(this);
23425 if (vlen_enc == Assembler::AVX_128bit) {
23426 vlen_enc = Assembler::AVX_256bit;
23427 }
23428 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23429 %}
23430 ins_pipe( pipe_slow );
23431 %}
23432
23433 // --------------------------------- FMA --------------------------------------
23434 // a * b + c
23435
23436 instruct vfmaF_reg(vec a, vec b, vec c) %{
23437 match(Set c (FmaVF c (Binary a b)));
23438 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23439 ins_cost(150);
23440 ins_encode %{
23441 assert(UseFMA, "not enabled");
23442 int vlen_enc = vector_length_encoding(this);
23443 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23444 %}
23445 ins_pipe( pipe_slow );
23446 %}
23447
23448 instruct vfmaF_mem(vec a, memory b, vec c) %{
23449 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23450 match(Set c (FmaVF c (Binary a (LoadVector b))));
23451 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23452 ins_cost(150);
23453 ins_encode %{
23454 assert(UseFMA, "not enabled");
23455 int vlen_enc = vector_length_encoding(this);
23456 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23457 %}
23458 ins_pipe( pipe_slow );
23459 %}
23460
23461 instruct vfmaD_reg(vec a, vec b, vec c) %{
23462 match(Set c (FmaVD c (Binary a b)));
23463 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23464 ins_cost(150);
23465 ins_encode %{
23466 assert(UseFMA, "not enabled");
23467 int vlen_enc = vector_length_encoding(this);
23468 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23469 %}
23470 ins_pipe( pipe_slow );
23471 %}
23472
23473 instruct vfmaD_mem(vec a, memory b, vec c) %{
23474 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23475 match(Set c (FmaVD c (Binary a (LoadVector b))));
23476 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23477 ins_cost(150);
23478 ins_encode %{
23479 assert(UseFMA, "not enabled");
23480 int vlen_enc = vector_length_encoding(this);
23481 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23482 %}
23483 ins_pipe( pipe_slow );
23484 %}
23485
23486 // --------------------------------- Vector Multiply Add --------------------------------------
23487
23488 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23489 predicate(UseAVX == 0);
23490 match(Set dst (MulAddVS2VI dst src1));
23491 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23492 ins_encode %{
23493 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23494 %}
23495 ins_pipe( pipe_slow );
23496 %}
23497
23498 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23499 predicate(UseAVX > 0);
23500 match(Set dst (MulAddVS2VI src1 src2));
23501 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23502 ins_encode %{
23503 int vlen_enc = vector_length_encoding(this);
23504 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23505 %}
23506 ins_pipe( pipe_slow );
23507 %}
23508
23509 // --------------------------------- Vector Multiply Add Add ----------------------------------
23510
23511 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23512 predicate(VM_Version::supports_avx512_vnni());
23513 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23514 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23515 ins_encode %{
23516 assert(UseAVX > 2, "required");
23517 int vlen_enc = vector_length_encoding(this);
23518 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23519 %}
23520 ins_pipe( pipe_slow );
23521 ins_cost(10);
23522 %}
23523
23524 // --------------------------------- PopCount --------------------------------------
23525
23526 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23527 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23528 match(Set dst (PopCountVI src));
23529 match(Set dst (PopCountVL src));
23530 format %{ "vector_popcount_integral $dst, $src" %}
23531 ins_encode %{
23532 int opcode = this->ideal_Opcode();
23533 int vlen_enc = vector_length_encoding(this, $src);
23534 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23535 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23536 %}
23537 ins_pipe( pipe_slow );
23538 %}
23539
23540 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23541 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23542 match(Set dst (PopCountVI src mask));
23543 match(Set dst (PopCountVL src mask));
23544 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23545 ins_encode %{
23546 int vlen_enc = vector_length_encoding(this, $src);
23547 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23548 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23549 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23550 %}
23551 ins_pipe( pipe_slow );
23552 %}
23553
23554 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23555 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23556 match(Set dst (PopCountVI src));
23557 match(Set dst (PopCountVL src));
23558 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23559 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23560 ins_encode %{
23561 int opcode = this->ideal_Opcode();
23562 int vlen_enc = vector_length_encoding(this, $src);
23563 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23564 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23565 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23566 %}
23567 ins_pipe( pipe_slow );
23568 %}
23569
23570 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23571
23572 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23573 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23574 Matcher::vector_length_in_bytes(n->in(1))));
23575 match(Set dst (CountTrailingZerosV src));
23576 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23577 ins_cost(400);
23578 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23579 ins_encode %{
23580 int vlen_enc = vector_length_encoding(this, $src);
23581 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23582 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23583 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23584 %}
23585 ins_pipe( pipe_slow );
23586 %}
23587
23588 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23589 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23590 VM_Version::supports_avx512cd() &&
23591 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23592 match(Set dst (CountTrailingZerosV src));
23593 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23594 ins_cost(400);
23595 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23596 ins_encode %{
23597 int vlen_enc = vector_length_encoding(this, $src);
23598 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23599 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23600 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23601 %}
23602 ins_pipe( pipe_slow );
23603 %}
23604
23605 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23606 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23607 match(Set dst (CountTrailingZerosV src));
23608 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23609 ins_cost(400);
23610 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23611 ins_encode %{
23612 int vlen_enc = vector_length_encoding(this, $src);
23613 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23614 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23615 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23616 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23617 %}
23618 ins_pipe( pipe_slow );
23619 %}
23620
23621 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23622 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23623 match(Set dst (CountTrailingZerosV src));
23624 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23625 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23626 ins_encode %{
23627 int vlen_enc = vector_length_encoding(this, $src);
23628 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23629 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23630 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23631 %}
23632 ins_pipe( pipe_slow );
23633 %}
23634
23635
23636 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23637
23638 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23639 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23640 effect(TEMP dst);
23641 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23642 ins_encode %{
23643 int vector_len = vector_length_encoding(this);
23644 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23645 %}
23646 ins_pipe( pipe_slow );
23647 %}
23648
23649 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23650 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23651 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23652 effect(TEMP dst);
23653 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23654 ins_encode %{
23655 int vector_len = vector_length_encoding(this);
23656 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23657 %}
23658 ins_pipe( pipe_slow );
23659 %}
23660
23661 // --------------------------------- Rotation Operations ----------------------------------
23662 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23663 match(Set dst (RotateLeftV src shift));
23664 match(Set dst (RotateRightV src shift));
23665 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23666 ins_encode %{
23667 int opcode = this->ideal_Opcode();
23668 int vector_len = vector_length_encoding(this);
23669 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23670 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23671 %}
23672 ins_pipe( pipe_slow );
23673 %}
23674
23675 instruct vprorate(vec dst, vec src, vec shift) %{
23676 match(Set dst (RotateLeftV src shift));
23677 match(Set dst (RotateRightV src shift));
23678 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23679 ins_encode %{
23680 int opcode = this->ideal_Opcode();
23681 int vector_len = vector_length_encoding(this);
23682 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23683 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23684 %}
23685 ins_pipe( pipe_slow );
23686 %}
23687
23688 // ---------------------------------- Masked Operations ------------------------------------
23689 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23690 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23691 match(Set dst (LoadVectorMasked mem mask));
23692 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23693 ins_encode %{
23694 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23695 int vlen_enc = vector_length_encoding(this);
23696 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23697 %}
23698 ins_pipe( pipe_slow );
23699 %}
23700
23701
23702 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23703 predicate(n->in(3)->bottom_type()->isa_vectmask());
23704 match(Set dst (LoadVectorMasked mem mask));
23705 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23706 ins_encode %{
23707 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23708 int vector_len = vector_length_encoding(this);
23709 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23710 %}
23711 ins_pipe( pipe_slow );
23712 %}
23713
23714 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23715 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23716 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23717 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23718 ins_encode %{
23719 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23720 int vlen_enc = vector_length_encoding(src_node);
23721 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23722 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23723 %}
23724 ins_pipe( pipe_slow );
23725 %}
23726
23727 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23728 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23729 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23730 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23731 ins_encode %{
23732 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23733 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23734 int vlen_enc = vector_length_encoding(src_node);
23735 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23736 %}
23737 ins_pipe( pipe_slow );
23738 %}
23739
23740 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23741 match(Set addr (VerifyVectorAlignment addr mask));
23742 effect(KILL cr);
23743 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23744 ins_encode %{
23745 Label Lskip;
23746 // check if masked bits of addr are zero
23747 __ testq($addr$$Register, $mask$$constant);
23748 __ jccb(Assembler::equal, Lskip);
23749 __ stop("verify_vector_alignment found a misaligned vector memory access");
23750 __ bind(Lskip);
23751 %}
23752 ins_pipe(pipe_slow);
23753 %}
23754
23755 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23756 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23757 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23758 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23759 ins_encode %{
23760 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23761 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23762
23763 Label DONE;
23764 int vlen_enc = vector_length_encoding(this, $src1);
23765 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23766
23767 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23768 __ mov64($dst$$Register, -1L);
23769 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23770 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23771 __ jccb(Assembler::carrySet, DONE);
23772 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23773 __ notq($dst$$Register);
23774 __ tzcntq($dst$$Register, $dst$$Register);
23775 __ bind(DONE);
23776 %}
23777 ins_pipe( pipe_slow );
23778 %}
23779
23780
23781 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23782 match(Set dst (VectorMaskGen len));
23783 effect(TEMP temp, KILL cr);
23784 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23785 ins_encode %{
23786 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23787 %}
23788 ins_pipe( pipe_slow );
23789 %}
23790
23791 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23792 match(Set dst (VectorMaskGen len));
23793 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23794 effect(TEMP temp);
23795 ins_encode %{
23796 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23797 __ kmovql($dst$$KRegister, $temp$$Register);
23798 %}
23799 ins_pipe( pipe_slow );
23800 %}
23801
23802 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23803 predicate(n->in(1)->bottom_type()->isa_vectmask());
23804 match(Set dst (VectorMaskToLong mask));
23805 effect(TEMP dst, KILL cr);
23806 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23807 ins_encode %{
23808 int opcode = this->ideal_Opcode();
23809 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23810 int mask_len = Matcher::vector_length(this, $mask);
23811 int mask_size = mask_len * type2aelembytes(mbt);
23812 int vlen_enc = vector_length_encoding(this, $mask);
23813 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23814 $dst$$Register, mask_len, mask_size, vlen_enc);
23815 %}
23816 ins_pipe( pipe_slow );
23817 %}
23818
23819 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23820 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23821 match(Set dst (VectorMaskToLong mask));
23822 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23823 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23824 ins_encode %{
23825 int opcode = this->ideal_Opcode();
23826 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23827 int mask_len = Matcher::vector_length(this, $mask);
23828 int vlen_enc = vector_length_encoding(this, $mask);
23829 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23830 $dst$$Register, mask_len, mbt, vlen_enc);
23831 %}
23832 ins_pipe( pipe_slow );
23833 %}
23834
23835 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23836 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23837 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23838 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23839 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23840 ins_encode %{
23841 int opcode = this->ideal_Opcode();
23842 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23843 int mask_len = Matcher::vector_length(this, $mask);
23844 int vlen_enc = vector_length_encoding(this, $mask);
23845 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23846 $dst$$Register, mask_len, mbt, vlen_enc);
23847 %}
23848 ins_pipe( pipe_slow );
23849 %}
23850
23851 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23852 predicate(n->in(1)->bottom_type()->isa_vectmask());
23853 match(Set dst (VectorMaskTrueCount mask));
23854 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23855 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23856 ins_encode %{
23857 int opcode = this->ideal_Opcode();
23858 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23859 int mask_len = Matcher::vector_length(this, $mask);
23860 int mask_size = mask_len * type2aelembytes(mbt);
23861 int vlen_enc = vector_length_encoding(this, $mask);
23862 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23863 $tmp$$Register, mask_len, mask_size, vlen_enc);
23864 %}
23865 ins_pipe( pipe_slow );
23866 %}
23867
23868 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23869 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23870 match(Set dst (VectorMaskTrueCount mask));
23871 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23872 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23873 ins_encode %{
23874 int opcode = this->ideal_Opcode();
23875 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23876 int mask_len = Matcher::vector_length(this, $mask);
23877 int vlen_enc = vector_length_encoding(this, $mask);
23878 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23879 $tmp$$Register, mask_len, mbt, vlen_enc);
23880 %}
23881 ins_pipe( pipe_slow );
23882 %}
23883
23884 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23885 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23886 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23887 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23888 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23889 ins_encode %{
23890 int opcode = this->ideal_Opcode();
23891 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23892 int mask_len = Matcher::vector_length(this, $mask);
23893 int vlen_enc = vector_length_encoding(this, $mask);
23894 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23895 $tmp$$Register, mask_len, mbt, vlen_enc);
23896 %}
23897 ins_pipe( pipe_slow );
23898 %}
23899
23900 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23901 predicate(n->in(1)->bottom_type()->isa_vectmask());
23902 match(Set dst (VectorMaskFirstTrue mask));
23903 match(Set dst (VectorMaskLastTrue mask));
23904 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23905 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23906 ins_encode %{
23907 int opcode = this->ideal_Opcode();
23908 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23909 int mask_len = Matcher::vector_length(this, $mask);
23910 int mask_size = mask_len * type2aelembytes(mbt);
23911 int vlen_enc = vector_length_encoding(this, $mask);
23912 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23913 $tmp$$Register, mask_len, mask_size, vlen_enc);
23914 %}
23915 ins_pipe( pipe_slow );
23916 %}
23917
23918 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23919 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23920 match(Set dst (VectorMaskFirstTrue mask));
23921 match(Set dst (VectorMaskLastTrue mask));
23922 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23923 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23924 ins_encode %{
23925 int opcode = this->ideal_Opcode();
23926 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23927 int mask_len = Matcher::vector_length(this, $mask);
23928 int vlen_enc = vector_length_encoding(this, $mask);
23929 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23930 $tmp$$Register, mask_len, mbt, vlen_enc);
23931 %}
23932 ins_pipe( pipe_slow );
23933 %}
23934
23935 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23936 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23937 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23938 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23939 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23940 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23941 ins_encode %{
23942 int opcode = this->ideal_Opcode();
23943 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23944 int mask_len = Matcher::vector_length(this, $mask);
23945 int vlen_enc = vector_length_encoding(this, $mask);
23946 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23947 $tmp$$Register, mask_len, mbt, vlen_enc);
23948 %}
23949 ins_pipe( pipe_slow );
23950 %}
23951
23952 // --------------------------------- Compress/Expand Operations ---------------------------
23953 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23954 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23955 match(Set dst (CompressV src mask));
23956 match(Set dst (ExpandV src mask));
23957 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23958 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23959 ins_encode %{
23960 int opcode = this->ideal_Opcode();
23961 int vlen_enc = vector_length_encoding(this);
23962 BasicType bt = Matcher::vector_element_basic_type(this);
23963 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23964 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23965 %}
23966 ins_pipe( pipe_slow );
23967 %}
23968
23969 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23970 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23971 match(Set dst (CompressV src mask));
23972 match(Set dst (ExpandV src mask));
23973 format %{ "vector_compress_expand $dst, $src, $mask" %}
23974 ins_encode %{
23975 int opcode = this->ideal_Opcode();
23976 int vector_len = vector_length_encoding(this);
23977 BasicType bt = Matcher::vector_element_basic_type(this);
23978 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23979 %}
23980 ins_pipe( pipe_slow );
23981 %}
23982
23983 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23984 match(Set dst (CompressM mask));
23985 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23986 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23987 ins_encode %{
23988 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
23989 int mask_len = Matcher::vector_length(this);
23990 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23991 %}
23992 ins_pipe( pipe_slow );
23993 %}
23994
23995 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23996
23997 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23998 predicate(!VM_Version::supports_gfni());
23999 match(Set dst (ReverseV src));
24000 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24001 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24002 ins_encode %{
24003 int vec_enc = vector_length_encoding(this);
24004 BasicType bt = Matcher::vector_element_basic_type(this);
24005 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24006 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24007 %}
24008 ins_pipe( pipe_slow );
24009 %}
24010
24011 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24012 predicate(VM_Version::supports_gfni());
24013 match(Set dst (ReverseV src));
24014 effect(TEMP dst, TEMP xtmp);
24015 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24016 ins_encode %{
24017 int vec_enc = vector_length_encoding(this);
24018 BasicType bt = Matcher::vector_element_basic_type(this);
24019 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24020 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24021 $xtmp$$XMMRegister);
24022 %}
24023 ins_pipe( pipe_slow );
24024 %}
24025
24026 instruct vreverse_byte_reg(vec dst, vec src) %{
24027 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24028 match(Set dst (ReverseBytesV src));
24029 effect(TEMP dst);
24030 format %{ "vector_reverse_byte $dst, $src" %}
24031 ins_encode %{
24032 int vec_enc = vector_length_encoding(this);
24033 BasicType bt = Matcher::vector_element_basic_type(this);
24034 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24035 %}
24036 ins_pipe( pipe_slow );
24037 %}
24038
24039 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24040 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24041 match(Set dst (ReverseBytesV src));
24042 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24043 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24044 ins_encode %{
24045 int vec_enc = vector_length_encoding(this);
24046 BasicType bt = Matcher::vector_element_basic_type(this);
24047 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24048 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24049 %}
24050 ins_pipe( pipe_slow );
24051 %}
24052
24053 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24054
24055 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24056 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24057 Matcher::vector_length_in_bytes(n->in(1))));
24058 match(Set dst (CountLeadingZerosV src));
24059 format %{ "vector_count_leading_zeros $dst, $src" %}
24060 ins_encode %{
24061 int vlen_enc = vector_length_encoding(this, $src);
24062 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24063 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24064 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24065 %}
24066 ins_pipe( pipe_slow );
24067 %}
24068
24069 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24070 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24071 Matcher::vector_length_in_bytes(n->in(1))));
24072 match(Set dst (CountLeadingZerosV src mask));
24073 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24074 ins_encode %{
24075 int vlen_enc = vector_length_encoding(this, $src);
24076 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24077 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24078 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24079 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24080 %}
24081 ins_pipe( pipe_slow );
24082 %}
24083
24084 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24085 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24086 VM_Version::supports_avx512cd() &&
24087 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24088 match(Set dst (CountLeadingZerosV src));
24089 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24090 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24091 ins_encode %{
24092 int vlen_enc = vector_length_encoding(this, $src);
24093 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24094 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24095 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24096 %}
24097 ins_pipe( pipe_slow );
24098 %}
24099
24100 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24101 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24102 match(Set dst (CountLeadingZerosV src));
24103 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24104 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24105 ins_encode %{
24106 int vlen_enc = vector_length_encoding(this, $src);
24107 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24108 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24109 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24110 $rtmp$$Register, true, vlen_enc);
24111 %}
24112 ins_pipe( pipe_slow );
24113 %}
24114
24115 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24116 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24117 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24118 match(Set dst (CountLeadingZerosV src));
24119 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24120 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24121 ins_encode %{
24122 int vlen_enc = vector_length_encoding(this, $src);
24123 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24124 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24125 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24126 %}
24127 ins_pipe( pipe_slow );
24128 %}
24129
24130 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24131 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24132 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24133 match(Set dst (CountLeadingZerosV src));
24134 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24135 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24136 ins_encode %{
24137 int vlen_enc = vector_length_encoding(this, $src);
24138 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24139 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24140 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24141 %}
24142 ins_pipe( pipe_slow );
24143 %}
24144
24145 // ---------------------------------- Vector Masked Operations ------------------------------------
24146
24147 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24148 match(Set dst (AddVB (Binary dst src2) mask));
24149 match(Set dst (AddVS (Binary dst src2) mask));
24150 match(Set dst (AddVI (Binary dst src2) mask));
24151 match(Set dst (AddVL (Binary dst src2) mask));
24152 match(Set dst (AddVF (Binary dst src2) mask));
24153 match(Set dst (AddVD (Binary dst src2) mask));
24154 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24155 ins_encode %{
24156 int vlen_enc = vector_length_encoding(this);
24157 BasicType bt = Matcher::vector_element_basic_type(this);
24158 int opc = this->ideal_Opcode();
24159 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24160 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24161 %}
24162 ins_pipe( pipe_slow );
24163 %}
24164
24165 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24166 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24167 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24168 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24169 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24170 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24171 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24172 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24173 ins_encode %{
24174 int vlen_enc = vector_length_encoding(this);
24175 BasicType bt = Matcher::vector_element_basic_type(this);
24176 int opc = this->ideal_Opcode();
24177 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24178 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24179 %}
24180 ins_pipe( pipe_slow );
24181 %}
24182
24183 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24184 match(Set dst (XorV (Binary dst src2) mask));
24185 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24186 ins_encode %{
24187 int vlen_enc = vector_length_encoding(this);
24188 BasicType bt = Matcher::vector_element_basic_type(this);
24189 int opc = this->ideal_Opcode();
24190 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24191 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24192 %}
24193 ins_pipe( pipe_slow );
24194 %}
24195
24196 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24197 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24198 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24199 ins_encode %{
24200 int vlen_enc = vector_length_encoding(this);
24201 BasicType bt = Matcher::vector_element_basic_type(this);
24202 int opc = this->ideal_Opcode();
24203 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24204 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24205 %}
24206 ins_pipe( pipe_slow );
24207 %}
24208
24209 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24210 match(Set dst (OrV (Binary dst src2) mask));
24211 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24212 ins_encode %{
24213 int vlen_enc = vector_length_encoding(this);
24214 BasicType bt = Matcher::vector_element_basic_type(this);
24215 int opc = this->ideal_Opcode();
24216 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24217 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24218 %}
24219 ins_pipe( pipe_slow );
24220 %}
24221
24222 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24223 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24224 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24225 ins_encode %{
24226 int vlen_enc = vector_length_encoding(this);
24227 BasicType bt = Matcher::vector_element_basic_type(this);
24228 int opc = this->ideal_Opcode();
24229 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24230 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24231 %}
24232 ins_pipe( pipe_slow );
24233 %}
24234
24235 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24236 match(Set dst (AndV (Binary dst src2) mask));
24237 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24238 ins_encode %{
24239 int vlen_enc = vector_length_encoding(this);
24240 BasicType bt = Matcher::vector_element_basic_type(this);
24241 int opc = this->ideal_Opcode();
24242 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24243 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24244 %}
24245 ins_pipe( pipe_slow );
24246 %}
24247
24248 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24249 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24250 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24251 ins_encode %{
24252 int vlen_enc = vector_length_encoding(this);
24253 BasicType bt = Matcher::vector_element_basic_type(this);
24254 int opc = this->ideal_Opcode();
24255 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24256 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24257 %}
24258 ins_pipe( pipe_slow );
24259 %}
24260
24261 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24262 match(Set dst (SubVB (Binary dst src2) mask));
24263 match(Set dst (SubVS (Binary dst src2) mask));
24264 match(Set dst (SubVI (Binary dst src2) mask));
24265 match(Set dst (SubVL (Binary dst src2) mask));
24266 match(Set dst (SubVF (Binary dst src2) mask));
24267 match(Set dst (SubVD (Binary dst src2) mask));
24268 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24269 ins_encode %{
24270 int vlen_enc = vector_length_encoding(this);
24271 BasicType bt = Matcher::vector_element_basic_type(this);
24272 int opc = this->ideal_Opcode();
24273 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24274 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24275 %}
24276 ins_pipe( pipe_slow );
24277 %}
24278
24279 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24280 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24281 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24282 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24283 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24284 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24285 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24286 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24287 ins_encode %{
24288 int vlen_enc = vector_length_encoding(this);
24289 BasicType bt = Matcher::vector_element_basic_type(this);
24290 int opc = this->ideal_Opcode();
24291 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24292 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24293 %}
24294 ins_pipe( pipe_slow );
24295 %}
24296
24297 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24298 match(Set dst (MulVS (Binary dst src2) mask));
24299 match(Set dst (MulVI (Binary dst src2) mask));
24300 match(Set dst (MulVL (Binary dst src2) mask));
24301 match(Set dst (MulVF (Binary dst src2) mask));
24302 match(Set dst (MulVD (Binary dst src2) mask));
24303 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24304 ins_encode %{
24305 int vlen_enc = vector_length_encoding(this);
24306 BasicType bt = Matcher::vector_element_basic_type(this);
24307 int opc = this->ideal_Opcode();
24308 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24309 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24310 %}
24311 ins_pipe( pipe_slow );
24312 %}
24313
24314 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24315 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24316 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24317 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24318 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24319 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24320 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24321 ins_encode %{
24322 int vlen_enc = vector_length_encoding(this);
24323 BasicType bt = Matcher::vector_element_basic_type(this);
24324 int opc = this->ideal_Opcode();
24325 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24326 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24327 %}
24328 ins_pipe( pipe_slow );
24329 %}
24330
24331 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24332 match(Set dst (SqrtVF dst mask));
24333 match(Set dst (SqrtVD dst mask));
24334 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24335 ins_encode %{
24336 int vlen_enc = vector_length_encoding(this);
24337 BasicType bt = Matcher::vector_element_basic_type(this);
24338 int opc = this->ideal_Opcode();
24339 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24341 %}
24342 ins_pipe( pipe_slow );
24343 %}
24344
24345 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24346 match(Set dst (DivVF (Binary dst src2) mask));
24347 match(Set dst (DivVD (Binary dst src2) mask));
24348 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24349 ins_encode %{
24350 int vlen_enc = vector_length_encoding(this);
24351 BasicType bt = Matcher::vector_element_basic_type(this);
24352 int opc = this->ideal_Opcode();
24353 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24354 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24355 %}
24356 ins_pipe( pipe_slow );
24357 %}
24358
24359 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24360 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24361 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24362 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24363 ins_encode %{
24364 int vlen_enc = vector_length_encoding(this);
24365 BasicType bt = Matcher::vector_element_basic_type(this);
24366 int opc = this->ideal_Opcode();
24367 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24368 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24369 %}
24370 ins_pipe( pipe_slow );
24371 %}
24372
24373
24374 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24375 match(Set dst (RotateLeftV (Binary dst shift) mask));
24376 match(Set dst (RotateRightV (Binary dst shift) mask));
24377 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24378 ins_encode %{
24379 int vlen_enc = vector_length_encoding(this);
24380 BasicType bt = Matcher::vector_element_basic_type(this);
24381 int opc = this->ideal_Opcode();
24382 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24383 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24384 %}
24385 ins_pipe( pipe_slow );
24386 %}
24387
24388 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24389 match(Set dst (RotateLeftV (Binary dst src2) mask));
24390 match(Set dst (RotateRightV (Binary dst src2) mask));
24391 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24392 ins_encode %{
24393 int vlen_enc = vector_length_encoding(this);
24394 BasicType bt = Matcher::vector_element_basic_type(this);
24395 int opc = this->ideal_Opcode();
24396 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24398 %}
24399 ins_pipe( pipe_slow );
24400 %}
24401
24402 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24403 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24404 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24405 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24406 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24407 ins_encode %{
24408 int vlen_enc = vector_length_encoding(this);
24409 BasicType bt = Matcher::vector_element_basic_type(this);
24410 int opc = this->ideal_Opcode();
24411 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24413 %}
24414 ins_pipe( pipe_slow );
24415 %}
24416
24417 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24418 predicate(!n->as_ShiftV()->is_var_shift());
24419 match(Set dst (LShiftVS (Binary dst src2) mask));
24420 match(Set dst (LShiftVI (Binary dst src2) mask));
24421 match(Set dst (LShiftVL (Binary dst src2) mask));
24422 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24423 ins_encode %{
24424 int vlen_enc = vector_length_encoding(this);
24425 BasicType bt = Matcher::vector_element_basic_type(this);
24426 int opc = this->ideal_Opcode();
24427 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24429 %}
24430 ins_pipe( pipe_slow );
24431 %}
24432
24433 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24434 predicate(n->as_ShiftV()->is_var_shift());
24435 match(Set dst (LShiftVS (Binary dst src2) mask));
24436 match(Set dst (LShiftVI (Binary dst src2) mask));
24437 match(Set dst (LShiftVL (Binary dst src2) mask));
24438 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24439 ins_encode %{
24440 int vlen_enc = vector_length_encoding(this);
24441 BasicType bt = Matcher::vector_element_basic_type(this);
24442 int opc = this->ideal_Opcode();
24443 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24445 %}
24446 ins_pipe( pipe_slow );
24447 %}
24448
24449 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24450 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24451 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24452 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24453 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24454 ins_encode %{
24455 int vlen_enc = vector_length_encoding(this);
24456 BasicType bt = Matcher::vector_element_basic_type(this);
24457 int opc = this->ideal_Opcode();
24458 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24459 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24460 %}
24461 ins_pipe( pipe_slow );
24462 %}
24463
24464 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24465 predicate(!n->as_ShiftV()->is_var_shift());
24466 match(Set dst (RShiftVS (Binary dst src2) mask));
24467 match(Set dst (RShiftVI (Binary dst src2) mask));
24468 match(Set dst (RShiftVL (Binary dst src2) mask));
24469 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24470 ins_encode %{
24471 int vlen_enc = vector_length_encoding(this);
24472 BasicType bt = Matcher::vector_element_basic_type(this);
24473 int opc = this->ideal_Opcode();
24474 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24475 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24476 %}
24477 ins_pipe( pipe_slow );
24478 %}
24479
24480 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24481 predicate(n->as_ShiftV()->is_var_shift());
24482 match(Set dst (RShiftVS (Binary dst src2) mask));
24483 match(Set dst (RShiftVI (Binary dst src2) mask));
24484 match(Set dst (RShiftVL (Binary dst src2) mask));
24485 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24486 ins_encode %{
24487 int vlen_enc = vector_length_encoding(this);
24488 BasicType bt = Matcher::vector_element_basic_type(this);
24489 int opc = this->ideal_Opcode();
24490 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24492 %}
24493 ins_pipe( pipe_slow );
24494 %}
24495
24496 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24497 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24498 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24499 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24500 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24501 ins_encode %{
24502 int vlen_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 int opc = this->ideal_Opcode();
24505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24507 %}
24508 ins_pipe( pipe_slow );
24509 %}
24510
24511 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24512 predicate(!n->as_ShiftV()->is_var_shift());
24513 match(Set dst (URShiftVS (Binary dst src2) mask));
24514 match(Set dst (URShiftVI (Binary dst src2) mask));
24515 match(Set dst (URShiftVL (Binary dst src2) mask));
24516 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24517 ins_encode %{
24518 int vlen_enc = vector_length_encoding(this);
24519 BasicType bt = Matcher::vector_element_basic_type(this);
24520 int opc = this->ideal_Opcode();
24521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24523 %}
24524 ins_pipe( pipe_slow );
24525 %}
24526
24527 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24528 predicate(n->as_ShiftV()->is_var_shift());
24529 match(Set dst (URShiftVS (Binary dst src2) mask));
24530 match(Set dst (URShiftVI (Binary dst src2) mask));
24531 match(Set dst (URShiftVL (Binary dst src2) mask));
24532 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24533 ins_encode %{
24534 int vlen_enc = vector_length_encoding(this);
24535 BasicType bt = Matcher::vector_element_basic_type(this);
24536 int opc = this->ideal_Opcode();
24537 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24538 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24539 %}
24540 ins_pipe( pipe_slow );
24541 %}
24542
24543 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24544 match(Set dst (MaxV (Binary dst src2) mask));
24545 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24546 ins_encode %{
24547 int vlen_enc = vector_length_encoding(this);
24548 BasicType bt = Matcher::vector_element_basic_type(this);
24549 int opc = this->ideal_Opcode();
24550 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24551 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24552 %}
24553 ins_pipe( pipe_slow );
24554 %}
24555
24556 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24557 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24558 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24559 ins_encode %{
24560 int vlen_enc = vector_length_encoding(this);
24561 BasicType bt = Matcher::vector_element_basic_type(this);
24562 int opc = this->ideal_Opcode();
24563 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24564 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24565 %}
24566 ins_pipe( pipe_slow );
24567 %}
24568
24569 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24570 match(Set dst (MinV (Binary dst src2) mask));
24571 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24572 ins_encode %{
24573 int vlen_enc = vector_length_encoding(this);
24574 BasicType bt = Matcher::vector_element_basic_type(this);
24575 int opc = this->ideal_Opcode();
24576 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24577 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24578 %}
24579 ins_pipe( pipe_slow );
24580 %}
24581
24582 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24583 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24584 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24585 ins_encode %{
24586 int vlen_enc = vector_length_encoding(this);
24587 BasicType bt = Matcher::vector_element_basic_type(this);
24588 int opc = this->ideal_Opcode();
24589 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24590 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24591 %}
24592 ins_pipe( pipe_slow );
24593 %}
24594
24595 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24596 match(Set dst (VectorRearrange (Binary dst src2) mask));
24597 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24598 ins_encode %{
24599 int vlen_enc = vector_length_encoding(this);
24600 BasicType bt = Matcher::vector_element_basic_type(this);
24601 int opc = this->ideal_Opcode();
24602 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24603 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24604 %}
24605 ins_pipe( pipe_slow );
24606 %}
24607
24608 instruct vabs_masked(vec dst, kReg mask) %{
24609 match(Set dst (AbsVB dst mask));
24610 match(Set dst (AbsVS dst mask));
24611 match(Set dst (AbsVI dst mask));
24612 match(Set dst (AbsVL dst mask));
24613 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24614 ins_encode %{
24615 int vlen_enc = vector_length_encoding(this);
24616 BasicType bt = Matcher::vector_element_basic_type(this);
24617 int opc = this->ideal_Opcode();
24618 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24619 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24620 %}
24621 ins_pipe( pipe_slow );
24622 %}
24623
24624 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24625 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24626 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24627 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24628 ins_encode %{
24629 assert(UseFMA, "Needs FMA instructions support.");
24630 int vlen_enc = vector_length_encoding(this);
24631 BasicType bt = Matcher::vector_element_basic_type(this);
24632 int opc = this->ideal_Opcode();
24633 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24635 %}
24636 ins_pipe( pipe_slow );
24637 %}
24638
24639 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24640 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24641 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24642 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24643 ins_encode %{
24644 assert(UseFMA, "Needs FMA instructions support.");
24645 int vlen_enc = vector_length_encoding(this);
24646 BasicType bt = Matcher::vector_element_basic_type(this);
24647 int opc = this->ideal_Opcode();
24648 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24649 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24650 %}
24651 ins_pipe( pipe_slow );
24652 %}
24653
24654 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24655 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24656 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24657 ins_encode %{
24658 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24659 int vlen_enc = vector_length_encoding(this, $src1);
24660 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24661
24662 // Comparison i
24663 switch (src1_elem_bt) {
24664 case T_BYTE: {
24665 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24666 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24667 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24668 break;
24669 }
24670 case T_SHORT: {
24671 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24672 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24673 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24674 break;
24675 }
24676 case T_INT: {
24677 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24678 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24679 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24680 break;
24681 }
24682 case T_LONG: {
24683 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24684 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24685 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24686 break;
24687 }
24688 case T_FLOAT: {
24689 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24690 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24691 break;
24692 }
24693 case T_DOUBLE: {
24694 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24695 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24696 break;
24697 }
24698 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24699 }
24700 %}
24701 ins_pipe( pipe_slow );
24702 %}
24703
24704 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24705 predicate(Matcher::vector_length(n) <= 32);
24706 match(Set dst (MaskAll src));
24707 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24708 ins_encode %{
24709 int mask_len = Matcher::vector_length(this);
24710 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24711 %}
24712 ins_pipe( pipe_slow );
24713 %}
24714
24715 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24716 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24717 match(Set dst (XorVMask src (MaskAll cnt)));
24718 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24719 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24720 ins_encode %{
24721 uint masklen = Matcher::vector_length(this);
24722 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24723 %}
24724 ins_pipe( pipe_slow );
24725 %}
24726
24727 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24728 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24729 (Matcher::vector_length(n) == 16) ||
24730 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24731 match(Set dst (XorVMask src (MaskAll cnt)));
24732 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24733 ins_encode %{
24734 uint masklen = Matcher::vector_length(this);
24735 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24736 %}
24737 ins_pipe( pipe_slow );
24738 %}
24739
24740 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24741 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24742 match(Set dst (VectorLongToMask src));
24743 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24744 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24745 ins_encode %{
24746 int mask_len = Matcher::vector_length(this);
24747 int vec_enc = vector_length_encoding(mask_len);
24748 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24749 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24750 %}
24751 ins_pipe( pipe_slow );
24752 %}
24753
24754
24755 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24756 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24757 match(Set dst (VectorLongToMask src));
24758 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24759 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24760 ins_encode %{
24761 int mask_len = Matcher::vector_length(this);
24762 assert(mask_len <= 32, "invalid mask length");
24763 int vec_enc = vector_length_encoding(mask_len);
24764 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24765 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24766 %}
24767 ins_pipe( pipe_slow );
24768 %}
24769
24770 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24771 predicate(n->bottom_type()->isa_vectmask());
24772 match(Set dst (VectorLongToMask src));
24773 format %{ "long_to_mask_evex $dst, $src\t!" %}
24774 ins_encode %{
24775 __ kmov($dst$$KRegister, $src$$Register);
24776 %}
24777 ins_pipe( pipe_slow );
24778 %}
24779
24780 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24781 match(Set dst (AndVMask src1 src2));
24782 match(Set dst (OrVMask src1 src2));
24783 match(Set dst (XorVMask src1 src2));
24784 effect(TEMP kscratch);
24785 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24786 ins_encode %{
24787 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24788 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24789 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24790 uint masklen = Matcher::vector_length(this);
24791 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24792 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24793 %}
24794 ins_pipe( pipe_slow );
24795 %}
24796
24797 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24798 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24799 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24800 ins_encode %{
24801 int vlen_enc = vector_length_encoding(this);
24802 BasicType bt = Matcher::vector_element_basic_type(this);
24803 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24804 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24805 %}
24806 ins_pipe( pipe_slow );
24807 %}
24808
24809 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24810 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24811 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24812 ins_encode %{
24813 int vlen_enc = vector_length_encoding(this);
24814 BasicType bt = Matcher::vector_element_basic_type(this);
24815 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24816 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24817 %}
24818 ins_pipe( pipe_slow );
24819 %}
24820
24821 instruct castMM(kReg dst)
24822 %{
24823 match(Set dst (CastVV dst));
24824
24825 size(0);
24826 format %{ "# castVV of $dst" %}
24827 ins_encode(/* empty encoding */);
24828 ins_cost(0);
24829 ins_pipe(empty);
24830 %}
24831
24832 instruct castVV(vec dst)
24833 %{
24834 match(Set dst (CastVV dst));
24835
24836 size(0);
24837 format %{ "# castVV of $dst" %}
24838 ins_encode(/* empty encoding */);
24839 ins_cost(0);
24840 ins_pipe(empty);
24841 %}
24842
24843 instruct castVVLeg(legVec dst)
24844 %{
24845 match(Set dst (CastVV dst));
24846
24847 size(0);
24848 format %{ "# castVV of $dst" %}
24849 ins_encode(/* empty encoding */);
24850 ins_cost(0);
24851 ins_pipe(empty);
24852 %}
24853
24854 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24855 %{
24856 match(Set dst (IsInfiniteF src));
24857 effect(TEMP ktmp, KILL cr);
24858 format %{ "float_class_check $dst, $src" %}
24859 ins_encode %{
24860 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24861 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24862 %}
24863 ins_pipe(pipe_slow);
24864 %}
24865
24866 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24867 %{
24868 match(Set dst (IsInfiniteD src));
24869 effect(TEMP ktmp, KILL cr);
24870 format %{ "double_class_check $dst, $src" %}
24871 ins_encode %{
24872 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24873 __ kmovbl($dst$$Register, $ktmp$$KRegister);
24874 %}
24875 ins_pipe(pipe_slow);
24876 %}
24877
24878 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24879 %{
24880 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24881 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24882 match(Set dst (SaturatingAddV src1 src2));
24883 match(Set dst (SaturatingSubV src1 src2));
24884 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24885 ins_encode %{
24886 int vlen_enc = vector_length_encoding(this);
24887 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24888 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24889 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24890 %}
24891 ins_pipe(pipe_slow);
24892 %}
24893
24894 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24895 %{
24896 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24897 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24898 match(Set dst (SaturatingAddV src1 src2));
24899 match(Set dst (SaturatingSubV src1 src2));
24900 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24901 ins_encode %{
24902 int vlen_enc = vector_length_encoding(this);
24903 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24904 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24905 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24906 %}
24907 ins_pipe(pipe_slow);
24908 %}
24909
24910 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24911 %{
24912 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24913 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24914 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24915 match(Set dst (SaturatingAddV src1 src2));
24916 match(Set dst (SaturatingSubV src1 src2));
24917 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24918 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24919 ins_encode %{
24920 int vlen_enc = vector_length_encoding(this);
24921 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24922 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24923 $src1$$XMMRegister, $src2$$XMMRegister,
24924 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24925 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24926 %}
24927 ins_pipe(pipe_slow);
24928 %}
24929
24930 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24931 %{
24932 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24933 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24934 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24935 match(Set dst (SaturatingAddV src1 src2));
24936 match(Set dst (SaturatingSubV src1 src2));
24937 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24938 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24939 ins_encode %{
24940 int vlen_enc = vector_length_encoding(this);
24941 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24942 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24943 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24944 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24945 %}
24946 ins_pipe(pipe_slow);
24947 %}
24948
24949 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24950 %{
24951 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24952 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24953 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24954 match(Set dst (SaturatingAddV src1 src2));
24955 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24956 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24957 ins_encode %{
24958 int vlen_enc = vector_length_encoding(this);
24959 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24960 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24961 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24962 %}
24963 ins_pipe(pipe_slow);
24964 %}
24965
24966 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24967 %{
24968 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24969 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24970 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24971 match(Set dst (SaturatingAddV src1 src2));
24972 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24973 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24974 ins_encode %{
24975 int vlen_enc = vector_length_encoding(this);
24976 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24977 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24978 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24979 %}
24980 ins_pipe(pipe_slow);
24981 %}
24982
24983 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24984 %{
24985 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24986 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24987 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24988 match(Set dst (SaturatingSubV src1 src2));
24989 effect(TEMP ktmp);
24990 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24991 ins_encode %{
24992 int vlen_enc = vector_length_encoding(this);
24993 BasicType elem_bt = Matcher::vector_element_basic_type(this);
24994 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24995 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24996 %}
24997 ins_pipe(pipe_slow);
24998 %}
24999
25000 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25001 %{
25002 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25003 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25004 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25005 match(Set dst (SaturatingSubV src1 src2));
25006 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25007 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25008 ins_encode %{
25009 int vlen_enc = vector_length_encoding(this);
25010 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25011 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25012 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25013 %}
25014 ins_pipe(pipe_slow);
25015 %}
25016
25017 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25018 %{
25019 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25020 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25021 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25022 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25023 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25024 ins_encode %{
25025 int vlen_enc = vector_length_encoding(this);
25026 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25027 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25028 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25029 %}
25030 ins_pipe(pipe_slow);
25031 %}
25032
25033 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25034 %{
25035 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25036 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25037 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25038 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25039 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25040 ins_encode %{
25041 int vlen_enc = vector_length_encoding(this);
25042 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25043 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25044 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25045 %}
25046 ins_pipe(pipe_slow);
25047 %}
25048
25049 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25050 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25051 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25052 match(Set dst (SaturatingAddV (Binary dst src) mask));
25053 match(Set dst (SaturatingSubV (Binary dst src) mask));
25054 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25055 ins_encode %{
25056 int vlen_enc = vector_length_encoding(this);
25057 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25058 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25059 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25060 %}
25061 ins_pipe( pipe_slow );
25062 %}
25063
25064 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25065 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25066 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25067 match(Set dst (SaturatingAddV (Binary dst src) mask));
25068 match(Set dst (SaturatingSubV (Binary dst src) mask));
25069 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25070 ins_encode %{
25071 int vlen_enc = vector_length_encoding(this);
25072 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25073 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25074 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25075 %}
25076 ins_pipe( pipe_slow );
25077 %}
25078
25079 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25080 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25081 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25082 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25083 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25084 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25085 ins_encode %{
25086 int vlen_enc = vector_length_encoding(this);
25087 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25088 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25089 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25090 %}
25091 ins_pipe( pipe_slow );
25092 %}
25093
25094 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25095 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25096 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25097 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25098 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25099 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25100 ins_encode %{
25101 int vlen_enc = vector_length_encoding(this);
25102 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25103 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25104 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25105 %}
25106 ins_pipe( pipe_slow );
25107 %}
25108
25109 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25110 %{
25111 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25112 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25113 ins_encode %{
25114 int vlen_enc = vector_length_encoding(this);
25115 BasicType bt = Matcher::vector_element_basic_type(this);
25116 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25117 %}
25118 ins_pipe(pipe_slow);
25119 %}
25120
25121 instruct reinterpretS2HF(regF dst, rRegI src)
25122 %{
25123 match(Set dst (ReinterpretS2HF src));
25124 format %{ "vmovw $dst, $src" %}
25125 ins_encode %{
25126 __ vmovw($dst$$XMMRegister, $src$$Register);
25127 %}
25128 ins_pipe(pipe_slow);
25129 %}
25130
25131 instruct reinterpretHF2S(rRegI dst, regF src)
25132 %{
25133 match(Set dst (ReinterpretHF2S src));
25134 format %{ "vmovw $dst, $src" %}
25135 ins_encode %{
25136 __ vmovw($dst$$Register, $src$$XMMRegister);
25137 %}
25138 ins_pipe(pipe_slow);
25139 %}
25140
25141 instruct convF2HFAndS2HF(regF dst, regF src)
25142 %{
25143 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25144 format %{ "convF2HFAndS2HF $dst, $src" %}
25145 ins_encode %{
25146 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25147 %}
25148 ins_pipe(pipe_slow);
25149 %}
25150
25151 instruct convHF2SAndHF2F(regF dst, regF src)
25152 %{
25153 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25154 format %{ "convHF2SAndHF2F $dst, $src" %}
25155 ins_encode %{
25156 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25157 %}
25158 ins_pipe(pipe_slow);
25159 %}
25160
25161 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25162 %{
25163 match(Set dst (SqrtHF src));
25164 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25165 ins_encode %{
25166 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25167 %}
25168 ins_pipe(pipe_slow);
25169 %}
25170
25171 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25172 %{
25173 match(Set dst (AddHF src1 src2));
25174 match(Set dst (DivHF src1 src2));
25175 match(Set dst (MulHF src1 src2));
25176 match(Set dst (SubHF src1 src2));
25177 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25178 ins_encode %{
25179 int opcode = this->ideal_Opcode();
25180 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25181 %}
25182 ins_pipe(pipe_slow);
25183 %}
25184
25185 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25186 %{
25187 predicate(VM_Version::supports_avx10_2());
25188 match(Set dst (MaxHF src1 src2));
25189 match(Set dst (MinHF src1 src2));
25190 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25191 ins_encode %{
25192 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25193 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25194 %}
25195 ins_pipe( pipe_slow );
25196 %}
25197
25198 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25199 %{
25200 predicate(!VM_Version::supports_avx10_2());
25201 match(Set dst (MaxHF src1 src2));
25202 match(Set dst (MinHF src1 src2));
25203 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25204 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25205 ins_encode %{
25206 int opcode = this->ideal_Opcode();
25207 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25208 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25209 %}
25210 ins_pipe( pipe_slow );
25211 %}
25212
25213 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25214 %{
25215 match(Set dst (FmaHF src2 (Binary dst src1)));
25216 effect(DEF dst);
25217 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25218 ins_encode %{
25219 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25220 %}
25221 ins_pipe( pipe_slow );
25222 %}
25223
25224
25225 instruct vector_sqrt_HF_reg(vec dst, vec src)
25226 %{
25227 match(Set dst (SqrtVHF src));
25228 format %{ "vector_sqrt_fp16 $dst, $src" %}
25229 ins_encode %{
25230 int vlen_enc = vector_length_encoding(this);
25231 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25232 %}
25233 ins_pipe(pipe_slow);
25234 %}
25235
25236 instruct vector_sqrt_HF_mem(vec dst, memory src)
25237 %{
25238 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25239 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25240 ins_encode %{
25241 int vlen_enc = vector_length_encoding(this);
25242 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25243 %}
25244 ins_pipe(pipe_slow);
25245 %}
25246
25247 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25248 %{
25249 match(Set dst (AddVHF src1 src2));
25250 match(Set dst (DivVHF src1 src2));
25251 match(Set dst (MulVHF src1 src2));
25252 match(Set dst (SubVHF src1 src2));
25253 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25254 ins_encode %{
25255 int vlen_enc = vector_length_encoding(this);
25256 int opcode = this->ideal_Opcode();
25257 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25258 %}
25259 ins_pipe(pipe_slow);
25260 %}
25261
25262
25263 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25264 %{
25265 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25266 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25267 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25268 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25269 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25270 ins_encode %{
25271 int vlen_enc = vector_length_encoding(this);
25272 int opcode = this->ideal_Opcode();
25273 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25274 %}
25275 ins_pipe(pipe_slow);
25276 %}
25277
25278 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25279 %{
25280 match(Set dst (FmaVHF src2 (Binary dst src1)));
25281 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25282 ins_encode %{
25283 int vlen_enc = vector_length_encoding(this);
25284 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25285 %}
25286 ins_pipe( pipe_slow );
25287 %}
25288
25289 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25290 %{
25291 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25292 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25293 ins_encode %{
25294 int vlen_enc = vector_length_encoding(this);
25295 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25296 %}
25297 ins_pipe( pipe_slow );
25298 %}
25299
25300 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25301 %{
25302 predicate(VM_Version::supports_avx10_2());
25303 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25304 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25305 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25306 ins_encode %{
25307 int vlen_enc = vector_length_encoding(this);
25308 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25309 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25310 %}
25311 ins_pipe( pipe_slow );
25312 %}
25313
25314 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25315 %{
25316 predicate(VM_Version::supports_avx10_2());
25317 match(Set dst (MinVHF src1 src2));
25318 match(Set dst (MaxVHF src1 src2));
25319 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25320 ins_encode %{
25321 int vlen_enc = vector_length_encoding(this);
25322 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25323 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25324 %}
25325 ins_pipe( pipe_slow );
25326 %}
25327
25328 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25329 %{
25330 predicate(!VM_Version::supports_avx10_2());
25331 match(Set dst (MinVHF src1 src2));
25332 match(Set dst (MaxVHF src1 src2));
25333 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25334 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25335 ins_encode %{
25336 int vlen_enc = vector_length_encoding(this);
25337 int opcode = this->ideal_Opcode();
25338 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25339 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25340 %}
25341 ins_pipe( pipe_slow );
25342 %}
25343
25344 //----------PEEPHOLE RULES-----------------------------------------------------
25345 // These must follow all instruction definitions as they use the names
25346 // defined in the instructions definitions.
25347 //
25348 // peeppredicate ( rule_predicate );
25349 // // the predicate unless which the peephole rule will be ignored
25350 //
25351 // peepmatch ( root_instr_name [preceding_instruction]* );
25352 //
25353 // peepprocedure ( procedure_name );
25354 // // provide a procedure name to perform the optimization, the procedure should
25355 // // reside in the architecture dependent peephole file, the method has the
25356 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25357 // // with the arguments being the basic block, the current node index inside the
25358 // // block, the register allocator, the functions upon invoked return a new node
25359 // // defined in peepreplace, and the rules of the nodes appearing in the
25360 // // corresponding peepmatch, the function return true if successful, else
25361 // // return false
25362 //
25363 // peepconstraint %{
25364 // (instruction_number.operand_name relational_op instruction_number.operand_name
25365 // [, ...] );
25366 // // instruction numbers are zero-based using left to right order in peepmatch
25367 //
25368 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25369 // // provide an instruction_number.operand_name for each operand that appears
25370 // // in the replacement instruction's match rule
25371 //
25372 // ---------VM FLAGS---------------------------------------------------------
25373 //
25374 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25375 //
25376 // Each peephole rule is given an identifying number starting with zero and
25377 // increasing by one in the order seen by the parser. An individual peephole
25378 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25379 // on the command-line.
25380 //
25381 // ---------CURRENT LIMITATIONS----------------------------------------------
25382 //
25383 // Only transformations inside a basic block (do we need more for peephole)
25384 //
25385 // ---------EXAMPLE----------------------------------------------------------
25386 //
25387 // // pertinent parts of existing instructions in architecture description
25388 // instruct movI(rRegI dst, rRegI src)
25389 // %{
25390 // match(Set dst (CopyI src));
25391 // %}
25392 //
25393 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25394 // %{
25395 // match(Set dst (AddI dst src));
25396 // effect(KILL cr);
25397 // %}
25398 //
25399 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25400 // %{
25401 // match(Set dst (AddI dst src));
25402 // %}
25403 //
25404 // 1. Simple replacement
25405 // - Only match adjacent instructions in same basic block
25406 // - Only equality constraints
25407 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25408 // - Only one replacement instruction
25409 //
25410 // // Change (inc mov) to lea
25411 // peephole %{
25412 // // lea should only be emitted when beneficial
25413 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25414 // // increment preceded by register-register move
25415 // peepmatch ( incI_rReg movI );
25416 // // require that the destination register of the increment
25417 // // match the destination register of the move
25418 // peepconstraint ( 0.dst == 1.dst );
25419 // // construct a replacement instruction that sets
25420 // // the destination to ( move's source register + one )
25421 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25422 // %}
25423 //
25424 // 2. Procedural replacement
25425 // - More flexible finding relevent nodes
25426 // - More flexible constraints
25427 // - More flexible transformations
25428 // - May utilise architecture-dependent API more effectively
25429 // - Currently only one replacement instruction due to adlc parsing capabilities
25430 //
25431 // // Change (inc mov) to lea
25432 // peephole %{
25433 // // lea should only be emitted when beneficial
25434 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25435 // // the rule numbers of these nodes inside are passed into the function below
25436 // peepmatch ( incI_rReg movI );
25437 // // the method that takes the responsibility of transformation
25438 // peepprocedure ( inc_mov_to_lea );
25439 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25440 // // node is passed into the function above
25441 // peepreplace ( leaI_rReg_immI() );
25442 // %}
25443
25444 // These instructions is not matched by the matcher but used by the peephole
25445 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25446 %{
25447 predicate(false);
25448 match(Set dst (AddI src1 src2));
25449 format %{ "leal $dst, [$src1 + $src2]" %}
25450 ins_encode %{
25451 Register dst = $dst$$Register;
25452 Register src1 = $src1$$Register;
25453 Register src2 = $src2$$Register;
25454 if (src1 != rbp && src1 != r13) {
25455 __ leal(dst, Address(src1, src2, Address::times_1));
25456 } else {
25457 assert(src2 != rbp && src2 != r13, "");
25458 __ leal(dst, Address(src2, src1, Address::times_1));
25459 }
25460 %}
25461 ins_pipe(ialu_reg_reg);
25462 %}
25463
25464 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25465 %{
25466 predicate(false);
25467 match(Set dst (AddI src1 src2));
25468 format %{ "leal $dst, [$src1 + $src2]" %}
25469 ins_encode %{
25470 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25471 %}
25472 ins_pipe(ialu_reg_reg);
25473 %}
25474
25475 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25476 %{
25477 predicate(false);
25478 match(Set dst (LShiftI src shift));
25479 format %{ "leal $dst, [$src << $shift]" %}
25480 ins_encode %{
25481 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25482 Register src = $src$$Register;
25483 if (scale == Address::times_2 && src != rbp && src != r13) {
25484 __ leal($dst$$Register, Address(src, src, Address::times_1));
25485 } else {
25486 __ leal($dst$$Register, Address(noreg, src, scale));
25487 }
25488 %}
25489 ins_pipe(ialu_reg_reg);
25490 %}
25491
25492 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25493 %{
25494 predicate(false);
25495 match(Set dst (AddL src1 src2));
25496 format %{ "leaq $dst, [$src1 + $src2]" %}
25497 ins_encode %{
25498 Register dst = $dst$$Register;
25499 Register src1 = $src1$$Register;
25500 Register src2 = $src2$$Register;
25501 if (src1 != rbp && src1 != r13) {
25502 __ leaq(dst, Address(src1, src2, Address::times_1));
25503 } else {
25504 assert(src2 != rbp && src2 != r13, "");
25505 __ leaq(dst, Address(src2, src1, Address::times_1));
25506 }
25507 %}
25508 ins_pipe(ialu_reg_reg);
25509 %}
25510
25511 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25512 %{
25513 predicate(false);
25514 match(Set dst (AddL src1 src2));
25515 format %{ "leaq $dst, [$src1 + $src2]" %}
25516 ins_encode %{
25517 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25518 %}
25519 ins_pipe(ialu_reg_reg);
25520 %}
25521
25522 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25523 %{
25524 predicate(false);
25525 match(Set dst (LShiftL src shift));
25526 format %{ "leaq $dst, [$src << $shift]" %}
25527 ins_encode %{
25528 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25529 Register src = $src$$Register;
25530 if (scale == Address::times_2 && src != rbp && src != r13) {
25531 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25532 } else {
25533 __ leaq($dst$$Register, Address(noreg, src, scale));
25534 }
25535 %}
25536 ins_pipe(ialu_reg_reg);
25537 %}
25538
25539 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25540 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25541 // processors with at least partial ALU support for lea
25542 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25543 // beneficial for processors with full ALU support
25544 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25545
25546 peephole
25547 %{
25548 peeppredicate(VM_Version::supports_fast_2op_lea());
25549 peepmatch (addI_rReg);
25550 peepprocedure (lea_coalesce_reg);
25551 peepreplace (leaI_rReg_rReg_peep());
25552 %}
25553
25554 peephole
25555 %{
25556 peeppredicate(VM_Version::supports_fast_2op_lea());
25557 peepmatch (addI_rReg_imm);
25558 peepprocedure (lea_coalesce_imm);
25559 peepreplace (leaI_rReg_immI_peep());
25560 %}
25561
25562 peephole
25563 %{
25564 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25565 VM_Version::is_intel_cascade_lake());
25566 peepmatch (incI_rReg);
25567 peepprocedure (lea_coalesce_imm);
25568 peepreplace (leaI_rReg_immI_peep());
25569 %}
25570
25571 peephole
25572 %{
25573 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25574 VM_Version::is_intel_cascade_lake());
25575 peepmatch (decI_rReg);
25576 peepprocedure (lea_coalesce_imm);
25577 peepreplace (leaI_rReg_immI_peep());
25578 %}
25579
25580 peephole
25581 %{
25582 peeppredicate(VM_Version::supports_fast_2op_lea());
25583 peepmatch (salI_rReg_immI2);
25584 peepprocedure (lea_coalesce_imm);
25585 peepreplace (leaI_rReg_immI2_peep());
25586 %}
25587
25588 peephole
25589 %{
25590 peeppredicate(VM_Version::supports_fast_2op_lea());
25591 peepmatch (addL_rReg);
25592 peepprocedure (lea_coalesce_reg);
25593 peepreplace (leaL_rReg_rReg_peep());
25594 %}
25595
25596 peephole
25597 %{
25598 peeppredicate(VM_Version::supports_fast_2op_lea());
25599 peepmatch (addL_rReg_imm);
25600 peepprocedure (lea_coalesce_imm);
25601 peepreplace (leaL_rReg_immL32_peep());
25602 %}
25603
25604 peephole
25605 %{
25606 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25607 VM_Version::is_intel_cascade_lake());
25608 peepmatch (incL_rReg);
25609 peepprocedure (lea_coalesce_imm);
25610 peepreplace (leaL_rReg_immL32_peep());
25611 %}
25612
25613 peephole
25614 %{
25615 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25616 VM_Version::is_intel_cascade_lake());
25617 peepmatch (decL_rReg);
25618 peepprocedure (lea_coalesce_imm);
25619 peepreplace (leaL_rReg_immL32_peep());
25620 %}
25621
25622 peephole
25623 %{
25624 peeppredicate(VM_Version::supports_fast_2op_lea());
25625 peepmatch (salL_rReg_immI2);
25626 peepprocedure (lea_coalesce_imm);
25627 peepreplace (leaL_rReg_immI2_peep());
25628 %}
25629
25630 peephole
25631 %{
25632 peepmatch (leaPCompressedOopOffset);
25633 peepprocedure (lea_remove_redundant);
25634 %}
25635
25636 peephole
25637 %{
25638 peepmatch (leaP8Narrow);
25639 peepprocedure (lea_remove_redundant);
25640 %}
25641
25642 peephole
25643 %{
25644 peepmatch (leaP32Narrow);
25645 peepprocedure (lea_remove_redundant);
25646 %}
25647
25648 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25649 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25650
25651 //int variant
25652 peephole
25653 %{
25654 peepmatch (testI_reg);
25655 peepprocedure (test_may_remove);
25656 %}
25657
25658 //long variant
25659 peephole
25660 %{
25661 peepmatch (testL_reg);
25662 peepprocedure (test_may_remove);
25663 %}
25664
25665
25666 //----------SMARTSPILL RULES---------------------------------------------------
25667 // These must follow all instruction definitions as they use the names
25668 // defined in the instructions definitions.