1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 // Math.min() # Math.max()
1712 // --------------------------
1713 // ucomis[s/d] #
1714 // ja -> b # a
1715 // jp -> NaN # NaN
1716 // jb -> a # b
1717 // je #
1718 // |-jz -> a | b # a & b
1719 // | -> a #
1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1721 XMMRegister a, XMMRegister b,
1722 XMMRegister xmmt, Register rt,
1723 bool min, bool single) {
1724
1725 Label nan, zero, below, above, done;
1726
1727 if (single)
1728 __ ucomiss(a, b);
1729 else
1730 __ ucomisd(a, b);
1731
1732 if (dst->encoding() != (min ? b : a)->encoding())
1733 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1734 else
1735 __ jccb(Assembler::above, done);
1736
1737 __ jccb(Assembler::parity, nan); // PF=1
1738 __ jccb(Assembler::below, below); // CF=1
1739
1740 // equal
1741 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1742 if (single) {
1743 __ ucomiss(a, xmmt);
1744 __ jccb(Assembler::equal, zero);
1745
1746 __ movflt(dst, a);
1747 __ jmp(done);
1748 }
1749 else {
1750 __ ucomisd(a, xmmt);
1751 __ jccb(Assembler::equal, zero);
1752
1753 __ movdbl(dst, a);
1754 __ jmp(done);
1755 }
1756
1757 __ bind(zero);
1758 if (min)
1759 __ vpor(dst, a, b, Assembler::AVX_128bit);
1760 else
1761 __ vpand(dst, a, b, Assembler::AVX_128bit);
1762
1763 __ jmp(done);
1764
1765 __ bind(above);
1766 if (single)
1767 __ movflt(dst, min ? b : a);
1768 else
1769 __ movdbl(dst, min ? b : a);
1770
1771 __ jmp(done);
1772
1773 __ bind(nan);
1774 if (single) {
1775 __ movl(rt, 0x7fc00000); // Float.NaN
1776 __ movdl(dst, rt);
1777 }
1778 else {
1779 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1780 __ movdq(dst, rt);
1781 }
1782 __ jmp(done);
1783
1784 __ bind(below);
1785 if (single)
1786 __ movflt(dst, min ? a : b);
1787 else
1788 __ movdbl(dst, min ? a : b);
1789
1790 __ bind(done);
1791 }
1792
1793 //=============================================================================
1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1795
1796 int ConstantTable::calculate_table_base_offset() const {
1797 return 0; // absolute addressing, no offset
1798 }
1799
1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1802 ShouldNotReachHere();
1803 }
1804
1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1806 // Empty encoding
1807 }
1808
1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1810 return 0;
1811 }
1812
1813 #ifndef PRODUCT
1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1815 st->print("# MachConstantBaseNode (empty encoding)");
1816 }
1817 #endif
1818
1819
1820 //=============================================================================
1821 #ifndef PRODUCT
1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1823 Compile* C = ra_->C;
1824
1825 int framesize = C->output()->frame_size_in_bytes();
1826 int bangsize = C->output()->bang_size_in_bytes();
1827 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1828 // Remove wordSize for return addr which is already pushed.
1829 framesize -= wordSize;
1830
1831 if (C->output()->need_stack_bang(bangsize)) {
1832 framesize -= wordSize;
1833 st->print("# stack bang (%d bytes)", bangsize);
1834 st->print("\n\t");
1835 st->print("pushq rbp\t# Save rbp");
1836 if (PreserveFramePointer) {
1837 st->print("\n\t");
1838 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1839 }
1840 if (framesize) {
1841 st->print("\n\t");
1842 st->print("subq rsp, #%d\t# Create frame",framesize);
1843 }
1844 } else {
1845 st->print("subq rsp, #%d\t# Create frame",framesize);
1846 st->print("\n\t");
1847 framesize -= wordSize;
1848 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1849 if (PreserveFramePointer) {
1850 st->print("\n\t");
1851 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1852 if (framesize > 0) {
1853 st->print("\n\t");
1854 st->print("addq rbp, #%d", framesize);
1855 }
1856 }
1857 }
1858
1859 if (VerifyStackAtCalls) {
1860 st->print("\n\t");
1861 framesize -= wordSize;
1862 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1863 #ifdef ASSERT
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1939 framesize -= 2*wordSize;
1940
1941 if (framesize) {
1942 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1943 st->print("\t");
1944 }
1945
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2023 static enum RC rc_class(OptoReg::Name reg)
2024 {
2025 if( !OptoReg::is_valid(reg) ) return rc_bad;
2026
2027 if (OptoReg::is_stack(reg)) return rc_stack;
2028
2029 VMReg r = OptoReg::as_VMReg(reg);
2030
2031 if (r->is_Register()) return rc_int;
2032
2033 if (r->is_KRegister()) return rc_kreg;
2034
2035 assert(r->is_XMMRegister(), "must be");
2036 return rc_float;
2037 }
2038
2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2041 int src_hi, int dst_hi, uint ireg, outputStream* st);
2042
2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2044 int stack_offset, int reg, uint ireg, outputStream* st);
2045
2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2047 int dst_offset, uint ireg, outputStream* st) {
2048 if (masm) {
2049 switch (ireg) {
2050 case Op_VecS:
2051 __ movq(Address(rsp, -8), rax);
2052 __ movl(rax, Address(rsp, src_offset));
2053 __ movl(Address(rsp, dst_offset), rax);
2054 __ movq(rax, Address(rsp, -8));
2055 break;
2056 case Op_VecD:
2057 __ pushq(Address(rsp, src_offset));
2058 __ popq (Address(rsp, dst_offset));
2059 break;
2060 case Op_VecX:
2061 __ pushq(Address(rsp, src_offset));
2062 __ popq (Address(rsp, dst_offset));
2063 __ pushq(Address(rsp, src_offset+8));
2064 __ popq (Address(rsp, dst_offset+8));
2065 break;
2066 case Op_VecY:
2067 __ vmovdqu(Address(rsp, -32), xmm0);
2068 __ vmovdqu(xmm0, Address(rsp, src_offset));
2069 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2070 __ vmovdqu(xmm0, Address(rsp, -32));
2071 break;
2072 case Op_VecZ:
2073 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2074 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2075 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2076 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2077 break;
2078 default:
2079 ShouldNotReachHere();
2080 }
2081 #ifndef PRODUCT
2082 } else {
2083 switch (ireg) {
2084 case Op_VecS:
2085 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2086 "movl rax, [rsp + #%d]\n\t"
2087 "movl [rsp + #%d], rax\n\t"
2088 "movq rax, [rsp - #8]",
2089 src_offset, dst_offset);
2090 break;
2091 case Op_VecD:
2092 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2093 "popq [rsp + #%d]",
2094 src_offset, dst_offset);
2095 break;
2096 case Op_VecX:
2097 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2098 "popq [rsp + #%d]\n\t"
2099 "pushq [rsp + #%d]\n\t"
2100 "popq [rsp + #%d]",
2101 src_offset, dst_offset, src_offset+8, dst_offset+8);
2102 break;
2103 case Op_VecY:
2104 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2105 "vmovdqu xmm0, [rsp + #%d]\n\t"
2106 "vmovdqu [rsp + #%d], xmm0\n\t"
2107 "vmovdqu xmm0, [rsp - #32]",
2108 src_offset, dst_offset);
2109 break;
2110 case Op_VecZ:
2111 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2112 "vmovdqu xmm0, [rsp + #%d]\n\t"
2113 "vmovdqu [rsp + #%d], xmm0\n\t"
2114 "vmovdqu xmm0, [rsp - #64]",
2115 src_offset, dst_offset);
2116 break;
2117 default:
2118 ShouldNotReachHere();
2119 }
2120 #endif
2121 }
2122 }
2123
2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2125 PhaseRegAlloc* ra_,
2126 bool do_size,
2127 outputStream* st) const {
2128 assert(masm != nullptr || st != nullptr, "sanity");
2129 // Get registers to move
2130 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2131 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2132 OptoReg::Name dst_second = ra_->get_reg_second(this);
2133 OptoReg::Name dst_first = ra_->get_reg_first(this);
2134
2135 enum RC src_second_rc = rc_class(src_second);
2136 enum RC src_first_rc = rc_class(src_first);
2137 enum RC dst_second_rc = rc_class(dst_second);
2138 enum RC dst_first_rc = rc_class(dst_first);
2139
2140 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2141 "must move at least 1 register" );
2142
2143 if (src_first == dst_first && src_second == dst_second) {
2144 // Self copy, no move
2145 return 0;
2146 }
2147 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2148 uint ireg = ideal_reg();
2149 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2150 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2151 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2152 // mem -> mem
2153 int src_offset = ra_->reg2offset(src_first);
2154 int dst_offset = ra_->reg2offset(dst_first);
2155 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2156 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2157 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2158 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2159 int stack_offset = ra_->reg2offset(dst_first);
2160 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2161 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2162 int stack_offset = ra_->reg2offset(src_first);
2163 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2164 } else {
2165 ShouldNotReachHere();
2166 }
2167 return 0;
2168 }
2169 if (src_first_rc == rc_stack) {
2170 // mem ->
2171 if (dst_first_rc == rc_stack) {
2172 // mem -> mem
2173 assert(src_second != dst_first, "overlap");
2174 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2175 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2176 // 64-bit
2177 int src_offset = ra_->reg2offset(src_first);
2178 int dst_offset = ra_->reg2offset(dst_first);
2179 if (masm) {
2180 __ pushq(Address(rsp, src_offset));
2181 __ popq (Address(rsp, dst_offset));
2182 #ifndef PRODUCT
2183 } else {
2184 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2185 "popq [rsp + #%d]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 } else {
2190 // 32-bit
2191 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2192 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2193 // No pushl/popl, so:
2194 int src_offset = ra_->reg2offset(src_first);
2195 int dst_offset = ra_->reg2offset(dst_first);
2196 if (masm) {
2197 __ movq(Address(rsp, -8), rax);
2198 __ movl(rax, Address(rsp, src_offset));
2199 __ movl(Address(rsp, dst_offset), rax);
2200 __ movq(rax, Address(rsp, -8));
2201 #ifndef PRODUCT
2202 } else {
2203 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2204 "movl rax, [rsp + #%d]\n\t"
2205 "movl [rsp + #%d], rax\n\t"
2206 "movq rax, [rsp - #8]",
2207 src_offset, dst_offset);
2208 #endif
2209 }
2210 }
2211 return 0;
2212 } else if (dst_first_rc == rc_int) {
2213 // mem -> gpr
2214 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2215 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2216 // 64-bit
2217 int offset = ra_->reg2offset(src_first);
2218 if (masm) {
2219 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2220 #ifndef PRODUCT
2221 } else {
2222 st->print("movq %s, [rsp + #%d]\t# spill",
2223 Matcher::regName[dst_first],
2224 offset);
2225 #endif
2226 }
2227 } else {
2228 // 32-bit
2229 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2230 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2231 int offset = ra_->reg2offset(src_first);
2232 if (masm) {
2233 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2234 #ifndef PRODUCT
2235 } else {
2236 st->print("movl %s, [rsp + #%d]\t# spill",
2237 Matcher::regName[dst_first],
2238 offset);
2239 #endif
2240 }
2241 }
2242 return 0;
2243 } else if (dst_first_rc == rc_float) {
2244 // mem-> xmm
2245 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2246 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2247 // 64-bit
2248 int offset = ra_->reg2offset(src_first);
2249 if (masm) {
2250 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2251 #ifndef PRODUCT
2252 } else {
2253 st->print("%s %s, [rsp + #%d]\t# spill",
2254 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2255 Matcher::regName[dst_first],
2256 offset);
2257 #endif
2258 }
2259 } else {
2260 // 32-bit
2261 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2262 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("movss %s, [rsp + #%d]\t# spill",
2269 Matcher::regName[dst_first],
2270 offset);
2271 #endif
2272 }
2273 }
2274 return 0;
2275 } else if (dst_first_rc == rc_kreg) {
2276 // mem -> kreg
2277 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2278 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2279 // 64-bit
2280 int offset = ra_->reg2offset(src_first);
2281 if (masm) {
2282 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2283 #ifndef PRODUCT
2284 } else {
2285 st->print("kmovq %s, [rsp + #%d]\t# spill",
2286 Matcher::regName[dst_first],
2287 offset);
2288 #endif
2289 }
2290 }
2291 return 0;
2292 }
2293 } else if (src_first_rc == rc_int) {
2294 // gpr ->
2295 if (dst_first_rc == rc_stack) {
2296 // gpr -> mem
2297 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2298 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2299 // 64-bit
2300 int offset = ra_->reg2offset(dst_first);
2301 if (masm) {
2302 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2303 #ifndef PRODUCT
2304 } else {
2305 st->print("movq [rsp + #%d], %s\t# spill",
2306 offset,
2307 Matcher::regName[src_first]);
2308 #endif
2309 }
2310 } else {
2311 // 32-bit
2312 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2313 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2314 int offset = ra_->reg2offset(dst_first);
2315 if (masm) {
2316 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2317 #ifndef PRODUCT
2318 } else {
2319 st->print("movl [rsp + #%d], %s\t# spill",
2320 offset,
2321 Matcher::regName[src_first]);
2322 #endif
2323 }
2324 }
2325 return 0;
2326 } else if (dst_first_rc == rc_int) {
2327 // gpr -> gpr
2328 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2329 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2330 // 64-bit
2331 if (masm) {
2332 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2333 as_Register(Matcher::_regEncode[src_first]));
2334 #ifndef PRODUCT
2335 } else {
2336 st->print("movq %s, %s\t# spill",
2337 Matcher::regName[dst_first],
2338 Matcher::regName[src_first]);
2339 #endif
2340 }
2341 return 0;
2342 } else {
2343 // 32-bit
2344 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2345 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2346 if (masm) {
2347 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movl %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 }
2358 } else if (dst_first_rc == rc_float) {
2359 // gpr -> xmm
2360 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2361 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2362 // 64-bit
2363 if (masm) {
2364 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2365 #ifndef PRODUCT
2366 } else {
2367 st->print("movdq %s, %s\t# spill",
2368 Matcher::regName[dst_first],
2369 Matcher::regName[src_first]);
2370 #endif
2371 }
2372 } else {
2373 // 32-bit
2374 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2375 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2376 if (masm) {
2377 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2378 #ifndef PRODUCT
2379 } else {
2380 st->print("movdl %s, %s\t# spill",
2381 Matcher::regName[dst_first],
2382 Matcher::regName[src_first]);
2383 #endif
2384 }
2385 }
2386 return 0;
2387 } else if (dst_first_rc == rc_kreg) {
2388 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2389 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2390 // 64-bit
2391 if (masm) {
2392 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("kmovq %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 Unimplemented();
2402 return 0;
2403 }
2404 } else if (src_first_rc == rc_float) {
2405 // xmm ->
2406 if (dst_first_rc == rc_stack) {
2407 // xmm -> mem
2408 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2409 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2410 // 64-bit
2411 int offset = ra_->reg2offset(dst_first);
2412 if (masm) {
2413 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2414 #ifndef PRODUCT
2415 } else {
2416 st->print("movsd [rsp + #%d], %s\t# spill",
2417 offset,
2418 Matcher::regName[src_first]);
2419 #endif
2420 }
2421 } else {
2422 // 32-bit
2423 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2424 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2425 int offset = ra_->reg2offset(dst_first);
2426 if (masm) {
2427 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2428 #ifndef PRODUCT
2429 } else {
2430 st->print("movss [rsp + #%d], %s\t# spill",
2431 offset,
2432 Matcher::regName[src_first]);
2433 #endif
2434 }
2435 }
2436 return 0;
2437 } else if (dst_first_rc == rc_int) {
2438 // xmm -> gpr
2439 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2440 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2441 // 64-bit
2442 if (masm) {
2443 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2444 #ifndef PRODUCT
2445 } else {
2446 st->print("movdq %s, %s\t# spill",
2447 Matcher::regName[dst_first],
2448 Matcher::regName[src_first]);
2449 #endif
2450 }
2451 } else {
2452 // 32-bit
2453 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2454 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2455 if (masm) {
2456 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2457 #ifndef PRODUCT
2458 } else {
2459 st->print("movdl %s, %s\t# spill",
2460 Matcher::regName[dst_first],
2461 Matcher::regName[src_first]);
2462 #endif
2463 }
2464 }
2465 return 0;
2466 } else if (dst_first_rc == rc_float) {
2467 // xmm -> xmm
2468 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2469 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2470 // 64-bit
2471 if (masm) {
2472 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2473 #ifndef PRODUCT
2474 } else {
2475 st->print("%s %s, %s\t# spill",
2476 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2477 Matcher::regName[dst_first],
2478 Matcher::regName[src_first]);
2479 #endif
2480 }
2481 } else {
2482 // 32-bit
2483 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2484 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2485 if (masm) {
2486 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2487 #ifndef PRODUCT
2488 } else {
2489 st->print("%s %s, %s\t# spill",
2490 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2491 Matcher::regName[dst_first],
2492 Matcher::regName[src_first]);
2493 #endif
2494 }
2495 }
2496 return 0;
2497 } else if (dst_first_rc == rc_kreg) {
2498 assert(false, "Illegal spilling");
2499 return 0;
2500 }
2501 } else if (src_first_rc == rc_kreg) {
2502 if (dst_first_rc == rc_stack) {
2503 // mem -> kreg
2504 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2505 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2506 // 64-bit
2507 int offset = ra_->reg2offset(dst_first);
2508 if (masm) {
2509 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2510 #ifndef PRODUCT
2511 } else {
2512 st->print("kmovq [rsp + #%d] , %s\t# spill",
2513 offset,
2514 Matcher::regName[src_first]);
2515 #endif
2516 }
2517 }
2518 return 0;
2519 } else if (dst_first_rc == rc_int) {
2520 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2521 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2522 // 64-bit
2523 if (masm) {
2524 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq %s, %s\t# spill",
2528 Matcher::regName[dst_first],
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 Unimplemented();
2534 return 0;
2535 } else if (dst_first_rc == rc_kreg) {
2536 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2537 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2538 // 64-bit
2539 if (masm) {
2540 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2541 #ifndef PRODUCT
2542 } else {
2543 st->print("kmovq %s, %s\t# spill",
2544 Matcher::regName[dst_first],
2545 Matcher::regName[src_first]);
2546 #endif
2547 }
2548 }
2549 return 0;
2550 } else if (dst_first_rc == rc_float) {
2551 assert(false, "Illegal spill");
2552 return 0;
2553 }
2554 }
2555
2556 assert(0," foo ");
2557 Unimplemented();
2558 return 0;
2559 }
2560
2561 #ifndef PRODUCT
2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2563 implementation(nullptr, ra_, false, st);
2564 }
2565 #endif
2566
2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2568 implementation(masm, ra_, false, nullptr);
2569 }
2570
2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2572 return MachNode::size(ra_);
2573 }
2574
2575 //=============================================================================
2576 #ifndef PRODUCT
2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2578 {
2579 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2580 int reg = ra_->get_reg_first(this);
2581 st->print("leaq %s, [rsp + #%d]\t# box lock",
2582 Matcher::regName[reg], offset);
2583 }
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
2650
2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2652 int oper_index) {
2653 if (mdef == nullptr) {
2654 return false;
2655 }
2656
2657 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2658 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2659 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2660 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2661 return false;
2662 }
2663
2664 // Complex memory operand covers multiple incoming edges needed for
2665 // address computation. Biasing def towards any address component will not
2666 // result in NDD demotion by assembler.
2667 if (mdef->operand_num_edges(oper_index) != 1) {
2668 return false;
2669 }
2670
2671 // Demotion candidate must be register mask compatible with definition.
2672 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2673 if (!oper_mask.overlap(mdef->out_RegMask())) {
2674 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2675 return false;
2676 }
2677
2678 switch (oper_index) {
2679 // First operand of MachNode corresponding to Intel APX NDD selection
2680 // pattern can share its assigned register with definition operand if
2681 // their live ranges do not overlap. In such a scenario we can demote
2682 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2683 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2684 // are decorated with a special flag by instruction selector.
2685 case 1:
2686 return is_ndd_demotable_opr1(mdef);
2687
2688 // Definition operand of commutative operation can be biased towards second
2689 // operand.
2690 case 2:
2691 return is_ndd_demotable_opr2(mdef);
2692
2693 // Current scheme only selects up to two biasing candidates
2694 default:
2695 assert(false, "unhandled operand index: %s", mdef->Name());
2696 break;
2697 }
2698
2699 return false;
2700 }
2701
2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2703 assert(EnableVectorSupport, "sanity");
2704 int lo = XMM0_num;
2705 int hi = XMM0b_num;
2706 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2707 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2708 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2709 return OptoRegPair(hi, lo);
2710 }
2711
2712 // Is this branch offset short enough that a short branch can be used?
2713 //
2714 // NOTE: If the platform does not provide any short branch variants, then
2715 // this method should return false for offset 0.
2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2717 // The passed offset is relative to address of the branch.
2718 // On 86 a branch displacement is calculated relative to address
2719 // of a next instruction.
2720 offset -= br_size;
2721
2722 // the short version of jmpConUCF2 contains multiple branches,
2723 // making the reach slightly less
2724 if (rule == jmpConUCF2_rule)
2725 return (-126 <= offset && offset <= 125);
2726 return (-128 <= offset && offset <= 127);
2727 }
2728
2729 #ifdef ASSERT
2730 // Return whether or not this register is ever used as an argument.
2731 bool Matcher::can_be_java_arg(int reg)
2732 {
2733 return
2734 reg == RDI_num || reg == RDI_H_num ||
2735 reg == RSI_num || reg == RSI_H_num ||
2736 reg == RDX_num || reg == RDX_H_num ||
2737 reg == RCX_num || reg == RCX_H_num ||
2738 reg == R8_num || reg == R8_H_num ||
2739 reg == R9_num || reg == R9_H_num ||
2740 reg == R12_num || reg == R12_H_num ||
2741 reg == XMM0_num || reg == XMM0b_num ||
2742 reg == XMM1_num || reg == XMM1b_num ||
2743 reg == XMM2_num || reg == XMM2b_num ||
2744 reg == XMM3_num || reg == XMM3b_num ||
2745 reg == XMM4_num || reg == XMM4b_num ||
2746 reg == XMM5_num || reg == XMM5b_num ||
2747 reg == XMM6_num || reg == XMM6b_num ||
2748 reg == XMM7_num || reg == XMM7b_num;
2749 }
2750 #endif
2751
2752 uint Matcher::int_pressure_limit()
2753 {
2754 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2755 }
2756
2757 uint Matcher::float_pressure_limit()
2758 {
2759 // After experiment around with different values, the following default threshold
2760 // works best for LCM's register pressure scheduling on x64.
2761 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2762 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2763 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2764 }
2765
2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2767 // In 64 bit mode a code which use multiply when
2768 // devisor is constant is faster than hardware
2769 // DIV instruction (it uses MulHiL).
2770 return false;
2771 }
2772
2773 // Register for DIVI projection of divmodI
2774 const RegMask& Matcher::divI_proj_mask() {
2775 return INT_RAX_REG_mask();
2776 }
2777
2778 // Register for MODI projection of divmodI
2779 const RegMask& Matcher::modI_proj_mask() {
2780 return INT_RDX_REG_mask();
2781 }
2782
2783 // Register for DIVL projection of divmodL
2784 const RegMask& Matcher::divL_proj_mask() {
2785 return LONG_RAX_REG_mask();
2786 }
2787
2788 // Register for MODL projection of divmodL
2789 const RegMask& Matcher::modL_proj_mask() {
2790 return LONG_RDX_REG_mask();
2791 }
2792
2793 %}
2794
2795 source_hpp %{
2796 // Header information of the source block.
2797 // Method declarations/definitions which are used outside
2798 // the ad-scope can conveniently be defined here.
2799 //
2800 // To keep related declarations/definitions/uses close together,
2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2802
2803 #include "runtime/vm_version.hpp"
2804
2805 class NativeJump;
2806
2807 class CallStubImpl {
2808
2809 //--------------------------------------------------------------
2810 //---< Used for optimization in Compile::shorten_branches >---
2811 //--------------------------------------------------------------
2812
2813 public:
2814 // Size of call trampoline stub.
2815 static uint size_call_trampoline() {
2816 return 0; // no call trampolines on this platform
2817 }
2818
2819 // number of relocations needed by a call trampoline stub
2820 static uint reloc_call_trampoline() {
2821 return 0; // no call trampolines on this platform
2822 }
2823 };
2824
2825 class HandlerImpl {
2826
2827 public:
2828
2829 static int emit_deopt_handler(C2_MacroAssembler* masm);
2830
2831 static uint size_deopt_handler() {
2832 // one call and one jmp.
2833 return 7;
2834 }
2835 };
2836
2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2838 switch(bytes) {
2839 case 4: // fall-through
2840 case 8: // fall-through
2841 case 16: return Assembler::AVX_128bit;
2842 case 32: return Assembler::AVX_256bit;
2843 case 64: return Assembler::AVX_512bit;
2844
2845 default: {
2846 ShouldNotReachHere();
2847 return Assembler::AVX_NoVec;
2848 }
2849 }
2850 }
2851
2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2853 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2854 }
2855
2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2857 uint def_idx = use->operand_index(opnd);
2858 Node* def = use->in(def_idx);
2859 return vector_length_encoding(def);
2860 }
2861
2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
2863 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2864 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2865 }
2866
2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2868 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2869 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2870 }
2871
2872 class Node::PD {
2873 public:
2874 enum NodeFlags : uint64_t {
2875 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2876 Flag_sets_carry_flag = Node::_last_flag << 2,
2877 Flag_sets_parity_flag = Node::_last_flag << 3,
2878 Flag_sets_zero_flag = Node::_last_flag << 4,
2879 Flag_sets_overflow_flag = Node::_last_flag << 5,
2880 Flag_sets_sign_flag = Node::_last_flag << 6,
2881 Flag_clears_carry_flag = Node::_last_flag << 7,
2882 Flag_clears_parity_flag = Node::_last_flag << 8,
2883 Flag_clears_zero_flag = Node::_last_flag << 9,
2884 Flag_clears_overflow_flag = Node::_last_flag << 10,
2885 Flag_clears_sign_flag = Node::_last_flag << 11,
2886 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2887 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2888 _last_flag = Flag_ndd_demotable_opr2
2889 };
2890 };
2891
2892 %} // end source_hpp
2893
2894 source %{
2895
2896 #include "opto/addnode.hpp"
2897 #include "c2_intelJccErratum_x86.hpp"
2898
2899 void PhaseOutput::pd_perform_mach_node_analysis() {
2900 if (VM_Version::has_intel_jcc_erratum()) {
2901 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2902 _buf_sizes._code += extra_padding;
2903 }
2904 }
2905
2906 int MachNode::pd_alignment_required() const {
2907 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2908 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2909 return IntelJccErratum::largest_jcc_size() + 1;
2910 } else {
2911 return 1;
2912 }
2913 }
2914
2915 int MachNode::compute_padding(int current_offset) const {
2916 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2917 Compile* C = Compile::current();
2918 PhaseOutput* output = C->output();
2919 Block* block = output->block();
2920 int index = output->index();
2921 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2922 } else {
2923 return 0;
2924 }
2925 }
2926
2927 // Emit deopt handler code.
2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2929
2930 // Note that the code buffer's insts_mark is always relative to insts.
2931 // That's why we must use the macroassembler to generate a handler.
2932 address base = __ start_a_stub(size_deopt_handler());
2933 if (base == nullptr) {
2934 ciEnv::current()->record_failure("CodeCache is full");
2935 return 0; // CodeBuffer::expand failed
2936 }
2937 int offset = __ offset();
2938
2939 Label start;
2940 __ bind(start);
2941
2942 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2943
2944 int entry_offset = __ offset();
2945
2946 __ jmp(start);
2947
2948 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2949 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2950 "out of bounds read in post-call NOP check");
2951 __ end_a_stub();
2952 return entry_offset;
2953 }
2954
2955 static Assembler::Width widthForType(BasicType bt) {
2956 if (bt == T_BYTE) {
2957 return Assembler::B;
2958 } else if (bt == T_SHORT) {
2959 return Assembler::W;
2960 } else if (bt == T_INT) {
2961 return Assembler::D;
2962 } else {
2963 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2964 return Assembler::Q;
2965 }
2966 }
2967
2968 //=============================================================================
2969
2970 // Float masks come from different places depending on platform.
2971 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2972 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2973 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2974 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2975 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2976 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2977 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2978 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2979 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2980 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2981 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2982 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2983 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2984 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2985 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2986 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2987 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2988 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2989 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2990
2991 //=============================================================================
2992 bool Matcher::match_rule_supported(int opcode) {
2993 if (!has_match_rule(opcode)) {
2994 return false; // no match rule present
2995 }
2996 switch (opcode) {
2997 case Op_AbsVL:
2998 case Op_StoreVectorScatter:
2999 if (UseAVX < 3) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountI:
3004 case Op_PopCountL:
3005 if (!UsePopCountInstruction) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountVI:
3010 if (UseAVX < 2) {
3011 return false;
3012 }
3013 break;
3014 case Op_CompressV:
3015 case Op_ExpandV:
3016 case Op_PopCountVL:
3017 if (UseAVX < 2) {
3018 return false;
3019 }
3020 break;
3021 case Op_MulVI:
3022 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3023 return false;
3024 }
3025 break;
3026 case Op_MulVL:
3027 if (UseSSE < 4) { // only with SSE4_1 or AVX
3028 return false;
3029 }
3030 break;
3031 case Op_MulReductionVL:
3032 if (VM_Version::supports_avx512dq() == false) {
3033 return false;
3034 }
3035 break;
3036 case Op_AbsVB:
3037 case Op_AbsVS:
3038 case Op_AbsVI:
3039 case Op_AddReductionVI:
3040 case Op_AndReductionV:
3041 case Op_OrReductionV:
3042 case Op_XorReductionV:
3043 if (UseSSE < 3) { // requires at least SSSE3
3044 return false;
3045 }
3046 break;
3047 case Op_MaxHF:
3048 case Op_MinHF:
3049 if (!VM_Version::supports_avx512vlbw()) {
3050 return false;
3051 } // fallthrough
3052 case Op_AddHF:
3053 case Op_DivHF:
3054 case Op_FmaHF:
3055 case Op_MulHF:
3056 case Op_ReinterpretS2HF:
3057 case Op_ReinterpretHF2S:
3058 case Op_SubHF:
3059 case Op_SqrtHF:
3060 if (!VM_Version::supports_avx512_fp16()) {
3061 return false;
3062 }
3063 break;
3064 case Op_VectorLoadShuffle:
3065 case Op_VectorRearrange:
3066 case Op_MulReductionVI:
3067 if (UseSSE < 4) { // requires at least SSE4
3068 return false;
3069 }
3070 break;
3071 case Op_IsInfiniteF:
3072 case Op_IsInfiniteD:
3073 if (!VM_Version::supports_avx512dq()) {
3074 return false;
3075 }
3076 break;
3077 case Op_SqrtVD:
3078 case Op_SqrtVF:
3079 case Op_VectorMaskCmp:
3080 case Op_VectorCastB2X:
3081 case Op_VectorCastS2X:
3082 case Op_VectorCastI2X:
3083 case Op_VectorCastL2X:
3084 case Op_VectorCastF2X:
3085 case Op_VectorCastD2X:
3086 case Op_VectorUCastB2X:
3087 case Op_VectorUCastS2X:
3088 case Op_VectorUCastI2X:
3089 case Op_VectorMaskCast:
3090 if (UseAVX < 1) { // enabled for AVX only
3091 return false;
3092 }
3093 break;
3094 case Op_PopulateIndex:
3095 if (UseAVX < 2) {
3096 return false;
3097 }
3098 break;
3099 case Op_RoundVF:
3100 if (UseAVX < 2) { // enabled for AVX2 only
3101 return false;
3102 }
3103 break;
3104 case Op_RoundVD:
3105 if (UseAVX < 3) {
3106 return false; // enabled for AVX3 only
3107 }
3108 break;
3109 case Op_CompareAndSwapL:
3110 case Op_CompareAndSwapP:
3111 break;
3112 case Op_StrIndexOf:
3113 if (!UseSSE42Intrinsics) {
3114 return false;
3115 }
3116 break;
3117 case Op_StrIndexOfChar:
3118 if (!UseSSE42Intrinsics) {
3119 return false;
3120 }
3121 break;
3122 case Op_OnSpinWait:
3123 if (VM_Version::supports_on_spin_wait() == false) {
3124 return false;
3125 }
3126 break;
3127 case Op_MulVB:
3128 case Op_LShiftVB:
3129 case Op_RShiftVB:
3130 case Op_URShiftVB:
3131 case Op_VectorInsert:
3132 case Op_VectorLoadMask:
3133 case Op_VectorStoreMask:
3134 case Op_VectorBlend:
3135 if (UseSSE < 4) {
3136 return false;
3137 }
3138 break;
3139 case Op_MaxD:
3140 case Op_MaxF:
3141 case Op_MinD:
3142 case Op_MinF:
3143 if (UseAVX < 1) { // enabled for AVX only
3144 return false;
3145 }
3146 break;
3147 case Op_CacheWB:
3148 case Op_CacheWBPreSync:
3149 case Op_CacheWBPostSync:
3150 if (!VM_Version::supports_data_cache_line_flush()) {
3151 return false;
3152 }
3153 break;
3154 case Op_ExtractB:
3155 case Op_ExtractL:
3156 case Op_ExtractI:
3157 case Op_RoundDoubleMode:
3158 if (UseSSE < 4) {
3159 return false;
3160 }
3161 break;
3162 case Op_RoundDoubleModeV:
3163 if (VM_Version::supports_avx() == false) {
3164 return false; // 128bit vroundpd is not available
3165 }
3166 break;
3167 case Op_LoadVectorGather:
3168 case Op_LoadVectorGatherMasked:
3169 if (UseAVX < 2) {
3170 return false;
3171 }
3172 break;
3173 case Op_FmaF:
3174 case Op_FmaD:
3175 case Op_FmaVD:
3176 case Op_FmaVF:
3177 if (!UseFMA) {
3178 return false;
3179 }
3180 break;
3181 case Op_MacroLogicV:
3182 if (UseAVX < 3 || !UseVectorMacroLogic) {
3183 return false;
3184 }
3185 break;
3186
3187 case Op_VectorCmpMasked:
3188 case Op_VectorMaskGen:
3189 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3190 return false;
3191 }
3192 break;
3193 case Op_VectorMaskFirstTrue:
3194 case Op_VectorMaskLastTrue:
3195 case Op_VectorMaskTrueCount:
3196 case Op_VectorMaskToLong:
3197 if (UseAVX < 1) {
3198 return false;
3199 }
3200 break;
3201 case Op_RoundF:
3202 case Op_RoundD:
3203 break;
3204 case Op_CopySignD:
3205 case Op_CopySignF:
3206 if (UseAVX < 3) {
3207 return false;
3208 }
3209 if (!VM_Version::supports_avx512vl()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressBits:
3214 case Op_ExpandBits:
3215 if (!VM_Version::supports_bmi2()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressM:
3220 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3221 return false;
3222 }
3223 break;
3224 case Op_ConvF2HF:
3225 case Op_ConvHF2F:
3226 if (!VM_Version::supports_float16()) {
3227 return false;
3228 }
3229 break;
3230 case Op_VectorCastF2HF:
3231 case Op_VectorCastHF2F:
3232 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3233 return false;
3234 }
3235 break;
3236 }
3237 return true; // Match rules are supported by default.
3238 }
3239
3240 //------------------------------------------------------------------------
3241
3242 static inline bool is_pop_count_instr_target(BasicType bt) {
3243 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3244 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3245 }
3246
3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3248 return match_rule_supported_vector(opcode, vlen, bt);
3249 }
3250
3251 // Identify extra cases that we might want to provide match rules for vector nodes and
3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3254 if (!match_rule_supported(opcode)) {
3255 return false;
3256 }
3257 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3258 // * SSE2 supports 128bit vectors for all types;
3259 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3260 // * AVX2 supports 256bit vectors for all types;
3261 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3262 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3263 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3264 // And MaxVectorSize is taken into account as well.
3265 if (!vector_size_supported(bt, vlen)) {
3266 return false;
3267 }
3268 // Special cases which require vector length follow:
3269 // * implementation limitations
3270 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3271 // * 128bit vroundpd instruction is present only in AVX1
3272 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3273 switch (opcode) {
3274 case Op_MaxVHF:
3275 case Op_MinVHF:
3276 if (!VM_Version::supports_avx512bw()) {
3277 return false;
3278 }
3279 case Op_AddVHF:
3280 case Op_DivVHF:
3281 case Op_FmaVHF:
3282 case Op_MulVHF:
3283 case Op_SubVHF:
3284 case Op_SqrtVHF:
3285 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3286 return false;
3287 }
3288 if (!VM_Version::supports_avx512_fp16()) {
3289 return false;
3290 }
3291 break;
3292 case Op_AbsVF:
3293 case Op_NegVF:
3294 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vandps and vxorps are not available
3296 }
3297 break;
3298 case Op_AbsVD:
3299 case Op_NegVD:
3300 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3302 }
3303 break;
3304 case Op_RotateRightV:
3305 case Op_RotateLeftV:
3306 if (bt != T_INT && bt != T_LONG) {
3307 return false;
3308 } // fallthrough
3309 case Op_MacroLogicV:
3310 if (!VM_Version::supports_evex() ||
3311 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3312 return false;
3313 }
3314 break;
3315 case Op_ClearArray:
3316 case Op_VectorMaskGen:
3317 case Op_VectorCmpMasked:
3318 if (!VM_Version::supports_avx512bw()) {
3319 return false;
3320 }
3321 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3322 return false;
3323 }
3324 break;
3325 case Op_LoadVectorMasked:
3326 case Op_StoreVectorMasked:
3327 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinV:
3332 case Op_UMaxV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinReductionV:
3338 case Op_UMaxReductionV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3343 return false;
3344 }
3345 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 break;
3349 case Op_MaxV:
3350 case Op_MinV:
3351 if (UseSSE < 4 && is_integral_type(bt)) {
3352 return false;
3353 }
3354 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3355 // Float/Double intrinsics are enabled for AVX family currently.
3356 if (UseAVX == 0) {
3357 return false;
3358 }
3359 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3360 return false;
3361 }
3362 }
3363 break;
3364 case Op_CallLeafVector:
3365 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3366 return false;
3367 }
3368 break;
3369 case Op_AddReductionVI:
3370 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3371 return false;
3372 }
3373 // fallthrough
3374 case Op_AndReductionV:
3375 case Op_OrReductionV:
3376 case Op_XorReductionV:
3377 if (is_subword_type(bt) && (UseSSE < 4)) {
3378 return false;
3379 }
3380 break;
3381 case Op_MinReductionV:
3382 case Op_MaxReductionV:
3383 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3384 return false;
3385 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3386 return false;
3387 }
3388 // Float/Double intrinsics enabled for AVX family.
3389 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3390 return false;
3391 }
3392 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3393 return false;
3394 }
3395 break;
3396 case Op_VectorBlend:
3397 if (UseAVX == 0 && size_in_bits < 128) {
3398 return false;
3399 }
3400 break;
3401 case Op_VectorTest:
3402 if (UseSSE < 4) {
3403 return false; // Implementation limitation
3404 } else if (size_in_bits < 32) {
3405 return false; // Implementation limitation
3406 }
3407 break;
3408 case Op_VectorLoadShuffle:
3409 case Op_VectorRearrange:
3410 if(vlen == 2) {
3411 return false; // Implementation limitation due to how shuffle is loaded
3412 } else if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 break;
3416 case Op_VectorLoadMask:
3417 case Op_VectorMaskCast:
3418 if (size_in_bits == 256 && UseAVX < 2) {
3419 return false; // Implementation limitation
3420 }
3421 // fallthrough
3422 case Op_VectorStoreMask:
3423 if (vlen == 2) {
3424 return false; // Implementation limitation
3425 }
3426 break;
3427 case Op_PopulateIndex:
3428 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3429 return false;
3430 }
3431 break;
3432 case Op_VectorCastB2X:
3433 case Op_VectorCastS2X:
3434 case Op_VectorCastI2X:
3435 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3436 return false;
3437 }
3438 break;
3439 case Op_VectorCastL2X:
3440 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3441 return false;
3442 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3443 return false;
3444 }
3445 break;
3446 case Op_VectorCastF2X: {
3447 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3448 // happen after intermediate conversion to integer and special handling
3449 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3450 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3451 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3452 return false;
3453 }
3454 }
3455 // fallthrough
3456 case Op_VectorCastD2X:
3457 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3458 return false;
3459 }
3460 break;
3461 case Op_VectorCastF2HF:
3462 case Op_VectorCastHF2F:
3463 if (!VM_Version::supports_f16c() &&
3464 ((!VM_Version::supports_evex() ||
3465 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3466 return false;
3467 }
3468 break;
3469 case Op_RoundVD:
3470 if (!VM_Version::supports_avx512dq()) {
3471 return false;
3472 }
3473 break;
3474 case Op_MulReductionVI:
3475 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3476 return false;
3477 }
3478 break;
3479 case Op_LoadVectorGatherMasked:
3480 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3481 return false;
3482 }
3483 if (is_subword_type(bt) &&
3484 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3485 (size_in_bits < 64) ||
3486 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3487 return false;
3488 }
3489 break;
3490 case Op_StoreVectorScatterMasked:
3491 case Op_StoreVectorScatter:
3492 if (is_subword_type(bt)) {
3493 return false;
3494 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3495 return false;
3496 }
3497 // fallthrough
3498 case Op_LoadVectorGather:
3499 if (!is_subword_type(bt) && size_in_bits == 64) {
3500 return false;
3501 }
3502 if (is_subword_type(bt) && size_in_bits < 64) {
3503 return false;
3504 }
3505 break;
3506 case Op_SaturatingAddV:
3507 case Op_SaturatingSubV:
3508 if (UseAVX < 1) {
3509 return false; // Implementation limitation
3510 }
3511 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3512 return false;
3513 }
3514 break;
3515 case Op_SelectFromTwoVector:
3516 if (size_in_bits < 128) {
3517 return false;
3518 }
3519 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3520 return false;
3521 }
3522 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3523 return false;
3524 }
3525 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3526 return false;
3527 }
3528 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3529 return false;
3530 }
3531 break;
3532 case Op_MaskAll:
3533 if (!VM_Version::supports_evex()) {
3534 return false;
3535 }
3536 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3537 return false;
3538 }
3539 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3540 return false;
3541 }
3542 break;
3543 case Op_VectorMaskCmp:
3544 if (vlen < 2 || size_in_bits < 32) {
3545 return false;
3546 }
3547 break;
3548 case Op_CompressM:
3549 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3550 return false;
3551 }
3552 break;
3553 case Op_CompressV:
3554 case Op_ExpandV:
3555 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3556 return false;
3557 }
3558 if (size_in_bits < 128 ) {
3559 return false;
3560 }
3561 case Op_VectorLongToMask:
3562 if (UseAVX < 1) {
3563 return false;
3564 }
3565 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3566 return false;
3567 }
3568 break;
3569 case Op_SignumVD:
3570 case Op_SignumVF:
3571 if (UseAVX < 1) {
3572 return false;
3573 }
3574 break;
3575 case Op_PopCountVI:
3576 case Op_PopCountVL: {
3577 if (!is_pop_count_instr_target(bt) &&
3578 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3579 return false;
3580 }
3581 }
3582 break;
3583 case Op_ReverseV:
3584 case Op_ReverseBytesV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 case Op_CountTrailingZerosV:
3590 case Op_CountLeadingZerosV:
3591 if (UseAVX < 2) {
3592 return false;
3593 }
3594 break;
3595 }
3596 return true; // Per default match rules are supported.
3597 }
3598
3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3600 // ADLC based match_rule_supported routine checks for the existence of pattern based
3601 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3602 // of their non-masked counterpart with mask edge being the differentiator.
3603 // This routine does a strict check on the existence of masked operation patterns
3604 // by returning a default false value for all the other opcodes apart from the
3605 // ones whose masked instruction patterns are defined in this file.
3606 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3607 return false;
3608 }
3609
3610 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3611 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3612 return false;
3613 }
3614 switch(opcode) {
3615 // Unary masked operations
3616 case Op_AbsVB:
3617 case Op_AbsVS:
3618 if(!VM_Version::supports_avx512bw()) {
3619 return false; // Implementation limitation
3620 }
3621 case Op_AbsVI:
3622 case Op_AbsVL:
3623 return true;
3624
3625 // Ternary masked operations
3626 case Op_FmaVF:
3627 case Op_FmaVD:
3628 return true;
3629
3630 case Op_MacroLogicV:
3631 if(bt != T_INT && bt != T_LONG) {
3632 return false;
3633 }
3634 return true;
3635
3636 // Binary masked operations
3637 case Op_AddVB:
3638 case Op_AddVS:
3639 case Op_SubVB:
3640 case Op_SubVS:
3641 case Op_MulVS:
3642 case Op_LShiftVS:
3643 case Op_RShiftVS:
3644 case Op_URShiftVS:
3645 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3646 if (!VM_Version::supports_avx512bw()) {
3647 return false; // Implementation limitation
3648 }
3649 return true;
3650
3651 case Op_MulVL:
3652 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3653 if (!VM_Version::supports_avx512dq()) {
3654 return false; // Implementation limitation
3655 }
3656 return true;
3657
3658 case Op_AndV:
3659 case Op_OrV:
3660 case Op_XorV:
3661 case Op_RotateRightV:
3662 case Op_RotateLeftV:
3663 if (bt != T_INT && bt != T_LONG) {
3664 return false; // Implementation limitation
3665 }
3666 return true;
3667
3668 case Op_VectorLoadMask:
3669 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3670 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3671 return false;
3672 }
3673 return true;
3674
3675 case Op_AddVI:
3676 case Op_AddVL:
3677 case Op_AddVF:
3678 case Op_AddVD:
3679 case Op_SubVI:
3680 case Op_SubVL:
3681 case Op_SubVF:
3682 case Op_SubVD:
3683 case Op_MulVI:
3684 case Op_MulVF:
3685 case Op_MulVD:
3686 case Op_DivVF:
3687 case Op_DivVD:
3688 case Op_SqrtVF:
3689 case Op_SqrtVD:
3690 case Op_LShiftVI:
3691 case Op_LShiftVL:
3692 case Op_RShiftVI:
3693 case Op_RShiftVL:
3694 case Op_URShiftVI:
3695 case Op_URShiftVL:
3696 case Op_LoadVectorMasked:
3697 case Op_StoreVectorMasked:
3698 case Op_LoadVectorGatherMasked:
3699 case Op_StoreVectorScatterMasked:
3700 return true;
3701
3702 case Op_UMinV:
3703 case Op_UMaxV:
3704 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3705 return false;
3706 } // fallthrough
3707 case Op_MaxV:
3708 case Op_MinV:
3709 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3710 return false; // Implementation limitation
3711 }
3712 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3713 return false; // Implementation limitation
3714 }
3715 return true;
3716 case Op_SaturatingAddV:
3717 case Op_SaturatingSubV:
3718 if (!is_subword_type(bt)) {
3719 return false;
3720 }
3721 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorMaskCmp:
3727 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731
3732 case Op_VectorRearrange:
3733 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3734 return false; // Implementation limitation
3735 }
3736 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3737 return false; // Implementation limitation
3738 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3739 return false; // Implementation limitation
3740 }
3741 return true;
3742
3743 // Binary Logical operations
3744 case Op_AndVMask:
3745 case Op_OrVMask:
3746 case Op_XorVMask:
3747 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3748 return false; // Implementation limitation
3749 }
3750 return true;
3751
3752 case Op_PopCountVI:
3753 case Op_PopCountVL:
3754 if (!is_pop_count_instr_target(bt)) {
3755 return false;
3756 }
3757 return true;
3758
3759 case Op_MaskAll:
3760 return true;
3761
3762 case Op_CountLeadingZerosV:
3763 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3764 return true;
3765 }
3766 default:
3767 return false;
3768 }
3769 }
3770
3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3772 return false;
3773 }
3774
3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3777 switch (elem_bt) {
3778 case T_BYTE: return false;
3779 case T_SHORT: return !VM_Version::supports_avx512bw();
3780 case T_INT: return !VM_Version::supports_avx();
3781 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3782 default:
3783 ShouldNotReachHere();
3784 return false;
3785 }
3786 }
3787
3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3789 // Prefer predicate if the mask type is "TypeVectMask".
3790 return vt->isa_vectmask() != nullptr;
3791 }
3792
3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3794 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3795 bool legacy = (generic_opnd->opcode() == LEGVEC);
3796 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3797 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3798 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3799 return new legVecZOper();
3800 }
3801 if (legacy) {
3802 switch (ideal_reg) {
3803 case Op_VecS: return new legVecSOper();
3804 case Op_VecD: return new legVecDOper();
3805 case Op_VecX: return new legVecXOper();
3806 case Op_VecY: return new legVecYOper();
3807 case Op_VecZ: return new legVecZOper();
3808 }
3809 } else {
3810 switch (ideal_reg) {
3811 case Op_VecS: return new vecSOper();
3812 case Op_VecD: return new vecDOper();
3813 case Op_VecX: return new vecXOper();
3814 case Op_VecY: return new vecYOper();
3815 case Op_VecZ: return new vecZOper();
3816 }
3817 }
3818 ShouldNotReachHere();
3819 return nullptr;
3820 }
3821
3822 bool Matcher::is_reg2reg_move(MachNode* m) {
3823 switch (m->rule()) {
3824 case MoveVec2Leg_rule:
3825 case MoveLeg2Vec_rule:
3826 case MoveF2VL_rule:
3827 case MoveF2LEG_rule:
3828 case MoveVL2F_rule:
3829 case MoveLEG2F_rule:
3830 case MoveD2VL_rule:
3831 case MoveD2LEG_rule:
3832 case MoveVL2D_rule:
3833 case MoveLEG2D_rule:
3834 return true;
3835 default:
3836 return false;
3837 }
3838 }
3839
3840 bool Matcher::is_generic_vector(MachOper* opnd) {
3841 switch (opnd->opcode()) {
3842 case VEC:
3843 case LEGVEC:
3844 return true;
3845 default:
3846 return false;
3847 }
3848 }
3849
3850 //------------------------------------------------------------------------
3851
3852 const RegMask* Matcher::predicate_reg_mask(void) {
3853 return &_VECTMASK_REG_mask;
3854 }
3855
3856 // Max vector size in bytes. 0 if not supported.
3857 int Matcher::vector_width_in_bytes(BasicType bt) {
3858 assert(is_java_primitive(bt), "only primitive type vectors");
3859 // SSE2 supports 128bit vectors for all types.
3860 // AVX2 supports 256bit vectors for all types.
3861 // AVX2/EVEX supports 512bit vectors for all types.
3862 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3863 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3864 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3865 size = (UseAVX > 2) ? 64 : 32;
3866 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3867 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3868 // Use flag to limit vector size.
3869 size = MIN2(size,(int)MaxVectorSize);
3870 // Minimum 2 values in vector (or 4 for bytes).
3871 switch (bt) {
3872 case T_DOUBLE:
3873 case T_LONG:
3874 if (size < 16) return 0;
3875 break;
3876 case T_FLOAT:
3877 case T_INT:
3878 if (size < 8) return 0;
3879 break;
3880 case T_BOOLEAN:
3881 if (size < 4) return 0;
3882 break;
3883 case T_CHAR:
3884 if (size < 4) return 0;
3885 break;
3886 case T_BYTE:
3887 if (size < 4) return 0;
3888 break;
3889 case T_SHORT:
3890 if (size < 4) return 0;
3891 break;
3892 default:
3893 ShouldNotReachHere();
3894 }
3895 return size;
3896 }
3897
3898 // Limits on vector size (number of elements) loaded into vector.
3899 int Matcher::max_vector_size(const BasicType bt) {
3900 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3901 }
3902 int Matcher::min_vector_size(const BasicType bt) {
3903 int max_size = max_vector_size(bt);
3904 // Min size which can be loaded into vector is 4 bytes.
3905 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3906 // Support for calling svml double64 vectors
3907 if (bt == T_DOUBLE) {
3908 size = 1;
3909 }
3910 return MIN2(size,max_size);
3911 }
3912
3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3914 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3915 // by default on Cascade Lake
3916 if (VM_Version::is_default_intel_cascade_lake()) {
3917 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3918 }
3919 return Matcher::max_vector_size(bt);
3920 }
3921
3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3923 return -1;
3924 }
3925
3926 // Vector ideal reg corresponding to specified size in bytes
3927 uint Matcher::vector_ideal_reg(int size) {
3928 assert(MaxVectorSize >= size, "");
3929 switch(size) {
3930 case 4: return Op_VecS;
3931 case 8: return Op_VecD;
3932 case 16: return Op_VecX;
3933 case 32: return Op_VecY;
3934 case 64: return Op_VecZ;
3935 }
3936 ShouldNotReachHere();
3937 return 0;
3938 }
3939
3940 // Check for shift by small constant as well
3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3942 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3943 shift->in(2)->get_int() <= 3 &&
3944 // Are there other uses besides address expressions?
3945 !matcher->is_visited(shift)) {
3946 address_visited.set(shift->_idx); // Flag as address_visited
3947 mstack.push(shift->in(2), Matcher::Visit);
3948 Node *conv = shift->in(1);
3949 // Allow Matcher to match the rule which bypass
3950 // ConvI2L operation for an array index on LP64
3951 // if the index value is positive.
3952 if (conv->Opcode() == Op_ConvI2L &&
3953 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3954 // Are there other uses besides address expressions?
3955 !matcher->is_visited(conv)) {
3956 address_visited.set(conv->_idx); // Flag as address_visited
3957 mstack.push(conv->in(1), Matcher::Pre_Visit);
3958 } else {
3959 mstack.push(conv, Matcher::Pre_Visit);
3960 }
3961 return true;
3962 }
3963 return false;
3964 }
3965
3966 // This function identifies sub-graphs in which a 'load' node is
3967 // input to two different nodes, and such that it can be matched
3968 // with BMI instructions like blsi, blsr, etc.
3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3971 // refers to the same node.
3972 //
3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3974 // This is a temporary solution until we make DAGs expressible in ADL.
3975 template<typename ConType>
3976 class FusedPatternMatcher {
3977 Node* _op1_node;
3978 Node* _mop_node;
3979 int _con_op;
3980
3981 static int match_next(Node* n, int next_op, int next_op_idx) {
3982 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3983 return -1;
3984 }
3985
3986 if (next_op_idx == -1) { // n is commutative, try rotations
3987 if (n->in(1)->Opcode() == next_op) {
3988 return 1;
3989 } else if (n->in(2)->Opcode() == next_op) {
3990 return 2;
3991 }
3992 } else {
3993 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3994 if (n->in(next_op_idx)->Opcode() == next_op) {
3995 return next_op_idx;
3996 }
3997 }
3998 return -1;
3999 }
4000
4001 public:
4002 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4003 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4004
4005 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4006 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4007 typename ConType::NativeType con_value) {
4008 if (_op1_node->Opcode() != op1) {
4009 return false;
4010 }
4011 if (_mop_node->outcnt() > 2) {
4012 return false;
4013 }
4014 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4015 if (op1_op2_idx == -1) {
4016 return false;
4017 }
4018 // Memory operation must be the other edge
4019 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4020
4021 // Check that the mop node is really what we want
4022 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4023 Node* op2_node = _op1_node->in(op1_op2_idx);
4024 if (op2_node->outcnt() > 1) {
4025 return false;
4026 }
4027 assert(op2_node->Opcode() == op2, "Should be");
4028 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4029 if (op2_con_idx == -1) {
4030 return false;
4031 }
4032 // Memory operation must be the other edge
4033 int op2_mop_idx = (op2_con_idx & 1) + 1;
4034 // Check that the memory operation is the same node
4035 if (op2_node->in(op2_mop_idx) == _mop_node) {
4036 // Now check the constant
4037 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4038 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4039 return true;
4040 }
4041 }
4042 }
4043 return false;
4044 }
4045 };
4046
4047 static bool is_bmi_pattern(Node* n, Node* m) {
4048 assert(UseBMI1Instructions, "sanity");
4049 if (n != nullptr && m != nullptr) {
4050 if (m->Opcode() == Op_LoadI) {
4051 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4052 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4053 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4054 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4055 } else if (m->Opcode() == Op_LoadL) {
4056 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4057 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4058 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4059 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4060 }
4061 }
4062 return false;
4063 }
4064
4065 // Should the matcher clone input 'm' of node 'n'?
4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4067 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4068 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4069 mstack.push(m, Visit);
4070 return true;
4071 }
4072 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4073 mstack.push(m, Visit); // m = ShiftCntV
4074 return true;
4075 }
4076 if (is_encode_and_store_pattern(n, m)) {
4077 mstack.push(m, Visit);
4078 return true;
4079 }
4080 return false;
4081 }
4082
4083 // Should the Matcher clone shifts on addressing modes, expecting them
4084 // to be subsumed into complex addressing expressions or compute them
4085 // into registers?
4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4087 Node *off = m->in(AddPNode::Offset);
4088 if (off->is_Con()) {
4089 address_visited.test_set(m->_idx); // Flag as address_visited
4090 Node *adr = m->in(AddPNode::Address);
4091
4092 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4093 // AtomicAdd is not an addressing expression.
4094 // Cheap to find it by looking for screwy base.
4095 if (adr->is_AddP() &&
4096 !adr->in(AddPNode::Base)->is_top() &&
4097 !adr->in(AddPNode::Offset)->is_Con() &&
4098 off->get_long() == (int) (off->get_long()) && // immL32
4099 // Are there other uses besides address expressions?
4100 !is_visited(adr)) {
4101 address_visited.set(adr->_idx); // Flag as address_visited
4102 Node *shift = adr->in(AddPNode::Offset);
4103 if (!clone_shift(shift, this, mstack, address_visited)) {
4104 mstack.push(shift, Pre_Visit);
4105 }
4106 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4107 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4108 } else {
4109 mstack.push(adr, Pre_Visit);
4110 }
4111
4112 // Clone X+offset as it also folds into most addressing expressions
4113 mstack.push(off, Visit);
4114 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4115 return true;
4116 } else if (clone_shift(off, this, mstack, address_visited)) {
4117 address_visited.test_set(m->_idx); // Flag as address_visited
4118 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4119 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4120 return true;
4121 }
4122 return false;
4123 }
4124
4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4126 switch (bt) {
4127 case BoolTest::eq:
4128 return Assembler::eq;
4129 case BoolTest::ne:
4130 return Assembler::neq;
4131 case BoolTest::le:
4132 case BoolTest::ule:
4133 return Assembler::le;
4134 case BoolTest::ge:
4135 case BoolTest::uge:
4136 return Assembler::nlt;
4137 case BoolTest::lt:
4138 case BoolTest::ult:
4139 return Assembler::lt;
4140 case BoolTest::gt:
4141 case BoolTest::ugt:
4142 return Assembler::nle;
4143 default : ShouldNotReachHere(); return Assembler::_false;
4144 }
4145 }
4146
4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4148 switch (bt) {
4149 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4150 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4151 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4152 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4153 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4154 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4155 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4156 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4157 }
4158 }
4159
4160 // Helper methods for MachSpillCopyNode::implementation().
4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4162 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4163 assert(ireg == Op_VecS || // 32bit vector
4164 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4165 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4166 "no non-adjacent vector moves" );
4167 if (masm) {
4168 switch (ireg) {
4169 case Op_VecS: // copy whole register
4170 case Op_VecD:
4171 case Op_VecX:
4172 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4173 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4174 } else {
4175 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4176 }
4177 break;
4178 case Op_VecY:
4179 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4180 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4181 } else {
4182 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4183 }
4184 break;
4185 case Op_VecZ:
4186 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4187 break;
4188 default:
4189 ShouldNotReachHere();
4190 }
4191 #ifndef PRODUCT
4192 } else {
4193 switch (ireg) {
4194 case Op_VecS:
4195 case Op_VecD:
4196 case Op_VecX:
4197 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4198 break;
4199 case Op_VecY:
4200 case Op_VecZ:
4201 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4202 break;
4203 default:
4204 ShouldNotReachHere();
4205 }
4206 #endif
4207 }
4208 }
4209
4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4211 int stack_offset, int reg, uint ireg, outputStream* st) {
4212 if (masm) {
4213 if (is_load) {
4214 switch (ireg) {
4215 case Op_VecS:
4216 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4217 break;
4218 case Op_VecD:
4219 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecX:
4222 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4223 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4224 } else {
4225 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4226 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4227 }
4228 break;
4229 case Op_VecY:
4230 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4231 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4232 } else {
4233 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4234 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4235 }
4236 break;
4237 case Op_VecZ:
4238 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4239 break;
4240 default:
4241 ShouldNotReachHere();
4242 }
4243 } else { // store
4244 switch (ireg) {
4245 case Op_VecS:
4246 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4247 break;
4248 case Op_VecD:
4249 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecX:
4252 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4253 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4254 }
4255 else {
4256 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4257 }
4258 break;
4259 case Op_VecY:
4260 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4261 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4262 }
4263 else {
4264 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4265 }
4266 break;
4267 case Op_VecZ:
4268 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4269 break;
4270 default:
4271 ShouldNotReachHere();
4272 }
4273 }
4274 #ifndef PRODUCT
4275 } else {
4276 if (is_load) {
4277 switch (ireg) {
4278 case Op_VecS:
4279 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecD:
4282 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecX:
4285 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecY:
4288 case Op_VecZ:
4289 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4290 break;
4291 default:
4292 ShouldNotReachHere();
4293 }
4294 } else { // store
4295 switch (ireg) {
4296 case Op_VecS:
4297 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecD:
4300 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecX:
4303 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecY:
4306 case Op_VecZ:
4307 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4308 break;
4309 default:
4310 ShouldNotReachHere();
4311 }
4312 }
4313 #endif
4314 }
4315 }
4316
4317 template <class T>
4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4319 int size = type2aelembytes(bt) * len;
4320 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4321 for (int i = 0; i < len; i++) {
4322 int offset = i * type2aelembytes(bt);
4323 switch (bt) {
4324 case T_BYTE: val->at(i) = con; break;
4325 case T_SHORT: {
4326 jshort c = con;
4327 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4328 break;
4329 }
4330 case T_INT: {
4331 jint c = con;
4332 memcpy(val->adr_at(offset), &c, sizeof(jint));
4333 break;
4334 }
4335 case T_LONG: {
4336 jlong c = con;
4337 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4338 break;
4339 }
4340 case T_FLOAT: {
4341 jfloat c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4343 break;
4344 }
4345 case T_DOUBLE: {
4346 jdouble c = con;
4347 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4348 break;
4349 }
4350 default: assert(false, "%s", type2name(bt));
4351 }
4352 }
4353 return val;
4354 }
4355
4356 static inline jlong high_bit_set(BasicType bt) {
4357 switch (bt) {
4358 case T_BYTE: return 0x8080808080808080;
4359 case T_SHORT: return 0x8000800080008000;
4360 case T_INT: return 0x8000000080000000;
4361 case T_LONG: return 0x8000000000000000;
4362 default:
4363 ShouldNotReachHere();
4364 return 0;
4365 }
4366 }
4367
4368 #ifndef PRODUCT
4369 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4370 st->print("nop \t# %d bytes pad for loops and calls", _count);
4371 }
4372 #endif
4373
4374 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4375 __ nop(_count);
4376 }
4377
4378 uint MachNopNode::size(PhaseRegAlloc*) const {
4379 return _count;
4380 }
4381
4382 #ifndef PRODUCT
4383 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4384 st->print("# breakpoint");
4385 }
4386 #endif
4387
4388 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4389 __ int3();
4390 }
4391
4392 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4393 return MachNode::size(ra_);
4394 }
4395
4396 %}
4397
4398 //----------ENCODING BLOCK-----------------------------------------------------
4399 // This block specifies the encoding classes used by the compiler to
4400 // output byte streams. Encoding classes are parameterized macros
4401 // used by Machine Instruction Nodes in order to generate the bit
4402 // encoding of the instruction. Operands specify their base encoding
4403 // interface with the interface keyword. There are currently
4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4405 // COND_INTER. REG_INTER causes an operand to generate a function
4406 // which returns its register number when queried. CONST_INTER causes
4407 // an operand to generate a function which returns the value of the
4408 // constant when queried. MEMORY_INTER causes an operand to generate
4409 // four functions which return the Base Register, the Index Register,
4410 // the Scale Value, and the Offset Value of the operand when queried.
4411 // COND_INTER causes an operand to generate six functions which return
4412 // the encoding code (ie - encoding bits for the instruction)
4413 // associated with each basic boolean condition for a conditional
4414 // instruction.
4415 //
4416 // Instructions specify two basic values for encoding. Again, a
4417 // function is available to check if the constant displacement is an
4418 // oop. They use the ins_encode keyword to specify their encoding
4419 // classes (which must be a sequence of enc_class names, and their
4420 // parameters, specified in the encoding block), and they use the
4421 // opcode keyword to specify, in order, their primary, secondary, and
4422 // tertiary opcode. Only the opcode sections which a particular
4423 // instruction needs for encoding need to be specified.
4424 encode %{
4425 enc_class cdql_enc(no_rax_rdx_RegI div)
4426 %{
4427 // Full implementation of Java idiv and irem; checks for
4428 // special case as described in JVM spec., p.243 & p.271.
4429 //
4430 // normal case special case
4431 //
4432 // input : rax: dividend min_int
4433 // reg: divisor -1
4434 //
4435 // output: rax: quotient (= rax idiv reg) min_int
4436 // rdx: remainder (= rax irem reg) 0
4437 //
4438 // Code sequnce:
4439 //
4440 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4441 // 5: 75 07/08 jne e <normal>
4442 // 7: 33 d2 xor %edx,%edx
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4446 // c: 74 03/04 je 11 <done>
4447 // 000000000000000e <normal>:
4448 // e: 99 cltd
4449 // [div >= 8 -> offset + 1]
4450 // [REX_B]
4451 // f: f7 f9 idiv $div
4452 // 0000000000000011 <done>:
4453 Label normal;
4454 Label done;
4455
4456 // cmp $0x80000000,%eax
4457 __ cmpl(as_Register(RAX_enc), 0x80000000);
4458
4459 // jne e <normal>
4460 __ jccb(Assembler::notEqual, normal);
4461
4462 // xor %edx,%edx
4463 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4464
4465 // cmp $0xffffffffffffffff,%ecx
4466 __ cmpl($div$$Register, -1);
4467
4468 // je 11 <done>
4469 __ jccb(Assembler::equal, done);
4470
4471 // <normal>
4472 // cltd
4473 __ bind(normal);
4474 __ cdql();
4475
4476 // idivl
4477 // <done>
4478 __ idivl($div$$Register);
4479 __ bind(done);
4480 %}
4481
4482 enc_class cdqq_enc(no_rax_rdx_RegL div)
4483 %{
4484 // Full implementation of Java ldiv and lrem; checks for
4485 // special case as described in JVM spec., p.243 & p.271.
4486 //
4487 // normal case special case
4488 //
4489 // input : rax: dividend min_long
4490 // reg: divisor -1
4491 //
4492 // output: rax: quotient (= rax idiv reg) min_long
4493 // rdx: remainder (= rax irem reg) 0
4494 //
4495 // Code sequnce:
4496 //
4497 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4498 // 7: 00 00 80
4499 // a: 48 39 d0 cmp %rdx,%rax
4500 // d: 75 08 jne 17 <normal>
4501 // f: 33 d2 xor %edx,%edx
4502 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4503 // 15: 74 05 je 1c <done>
4504 // 0000000000000017 <normal>:
4505 // 17: 48 99 cqto
4506 // 19: 48 f7 f9 idiv $div
4507 // 000000000000001c <done>:
4508 Label normal;
4509 Label done;
4510
4511 // mov $0x8000000000000000,%rdx
4512 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4513
4514 // cmp %rdx,%rax
4515 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4516
4517 // jne 17 <normal>
4518 __ jccb(Assembler::notEqual, normal);
4519
4520 // xor %edx,%edx
4521 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4522
4523 // cmp $0xffffffffffffffff,$div
4524 __ cmpq($div$$Register, -1);
4525
4526 // je 1e <done>
4527 __ jccb(Assembler::equal, done);
4528
4529 // <normal>
4530 // cqto
4531 __ bind(normal);
4532 __ cdqq();
4533
4534 // idivq (note: must be emitted by the user of this rule)
4535 // <done>
4536 __ idivq($div$$Register);
4537 __ bind(done);
4538 %}
4539
4540 enc_class clear_avx %{
4541 DEBUG_ONLY(int off0 = __ offset());
4542 if (generate_vzeroupper(Compile::current())) {
4543 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4544 // Clear upper bits of YMM registers when current compiled code uses
4545 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4546 __ vzeroupper();
4547 }
4548 DEBUG_ONLY(int off1 = __ offset());
4549 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4550 %}
4551
4552 enc_class Java_To_Runtime(method meth) %{
4553 __ lea(r10, RuntimeAddress((address)$meth$$method));
4554 __ call(r10);
4555 __ post_call_nop();
4556 %}
4557
4558 enc_class Java_Static_Call(method meth)
4559 %{
4560 // JAVA STATIC CALL
4561 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4562 // determine who we intended to call.
4563 if (!_method) {
4564 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4565 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4566 // The NOP here is purely to ensure that eliding a call to
4567 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4568 __ addr_nop_5();
4569 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4570 } else {
4571 int method_index = resolved_method_index(masm);
4572 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4573 : static_call_Relocation::spec(method_index);
4574 address mark = __ pc();
4575 int call_offset = __ offset();
4576 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4577 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4578 // Calls of the same statically bound method can share
4579 // a stub to the interpreter.
4580 __ code()->shared_stub_to_interp_for(_method, call_offset);
4581 } else {
4582 // Emit stubs for static call.
4583 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4584 __ clear_inst_mark();
4585 if (stub == nullptr) {
4586 ciEnv::current()->record_failure("CodeCache is full");
4587 return;
4588 }
4589 }
4590 }
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class Java_Dynamic_Call(method meth) %{
4595 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class call_epilog %{
4600 if (VerifyStackAtCalls) {
4601 // Check that stack depth is unchanged: find majik cookie on stack
4602 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4603 Label L;
4604 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4605 __ jccb(Assembler::equal, L);
4606 // Die if stack mismatch
4607 __ int3();
4608 __ bind(L);
4609 }
4610 %}
4611
4612 %}
4613
4614 //----------FRAME--------------------------------------------------------------
4615 // Definition of frame structure and management information.
4616 //
4617 // S T A C K L A Y O U T Allocators stack-slot number
4618 // | (to get allocators register number
4619 // G Owned by | | v add OptoReg::stack0())
4620 // r CALLER | |
4621 // o | +--------+ pad to even-align allocators stack-slot
4622 // w V | pad0 | numbers; owned by CALLER
4623 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4624 // h ^ | in | 5
4625 // | | args | 4 Holes in incoming args owned by SELF
4626 // | | | | 3
4627 // | | +--------+
4628 // V | | old out| Empty on Intel, window on Sparc
4629 // | old |preserve| Must be even aligned.
4630 // | SP-+--------+----> Matcher::_old_SP, even aligned
4631 // | | in | 3 area for Intel ret address
4632 // Owned by |preserve| Empty on Sparc.
4633 // SELF +--------+
4634 // | | pad2 | 2 pad to align old SP
4635 // | +--------+ 1
4636 // | | locks | 0
4637 // | +--------+----> OptoReg::stack0(), even aligned
4638 // | | pad1 | 11 pad to align new SP
4639 // | +--------+
4640 // | | | 10
4641 // | | spills | 9 spills
4642 // V | | 8 (pad0 slot for callee)
4643 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4644 // ^ | out | 7
4645 // | | args | 6 Holes in outgoing args owned by CALLEE
4646 // Owned by +--------+
4647 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4648 // | new |preserve| Must be even-aligned.
4649 // | SP-+--------+----> Matcher::_new_SP, even aligned
4650 // | | |
4651 //
4652 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4653 // known from SELF's arguments and the Java calling convention.
4654 // Region 6-7 is determined per call site.
4655 // Note 2: If the calling convention leaves holes in the incoming argument
4656 // area, those holes are owned by SELF. Holes in the outgoing area
4657 // are owned by the CALLEE. Holes should not be necessary in the
4658 // incoming area, as the Java calling convention is completely under
4659 // the control of the AD file. Doubles can be sorted and packed to
4660 // avoid holes. Holes in the outgoing arguments may be necessary for
4661 // varargs C calling conventions.
4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4663 // even aligned with pad0 as needed.
4664 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4665 // region 6-11 is even aligned; it may be padded out more so that
4666 // the region from SP to FP meets the minimum stack alignment.
4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4668 // alignment. Region 11, pad1, may be dynamically extended so that
4669 // SP meets the minimum alignment.
4670
4671 frame
4672 %{
4673 // These three registers define part of the calling convention
4674 // between compiled code and the interpreter.
4675 inline_cache_reg(RAX); // Inline Cache Register
4676
4677 // Optional: name the operand used by cisc-spilling to access
4678 // [stack_pointer + offset]
4679 cisc_spilling_operand_name(indOffset32);
4680
4681 // Number of stack slots consumed by locking an object
4682 sync_stack_slots(2);
4683
4684 // Compiled code's Frame Pointer
4685 frame_pointer(RSP);
4686
4687 // Stack alignment requirement
4688 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4689
4690 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4691 // for calls to C. Supports the var-args backing area for register parms.
4692 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4693
4694 // The after-PROLOG location of the return address. Location of
4695 // return address specifies a type (REG or STACK) and a number
4696 // representing the register number (i.e. - use a register name) or
4697 // stack slot.
4698 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4699 // Otherwise, it is above the locks and verification slot and alignment word
4700 return_addr(STACK - 2 +
4701 align_up((Compile::current()->in_preserve_stack_slots() +
4702 Compile::current()->fixed_slots()),
4703 stack_alignment_in_slots()));
4704
4705 // Location of compiled Java return values. Same as C for now.
4706 return_value
4707 %{
4708 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4709 "only return normal values");
4710
4711 static const int lo[Op_RegL + 1] = {
4712 0,
4713 0,
4714 RAX_num, // Op_RegN
4715 RAX_num, // Op_RegI
4716 RAX_num, // Op_RegP
4717 XMM0_num, // Op_RegF
4718 XMM0_num, // Op_RegD
4719 RAX_num // Op_RegL
4720 };
4721 static const int hi[Op_RegL + 1] = {
4722 0,
4723 0,
4724 OptoReg::Bad, // Op_RegN
4725 OptoReg::Bad, // Op_RegI
4726 RAX_H_num, // Op_RegP
4727 OptoReg::Bad, // Op_RegF
4728 XMM0b_num, // Op_RegD
4729 RAX_H_num // Op_RegL
4730 };
4731 // Excluded flags and vector registers.
4732 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4733 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4734 %}
4735 %}
4736
4737 //----------ATTRIBUTES---------------------------------------------------------
4738 //----------Operand Attributes-------------------------------------------------
4739 op_attrib op_cost(0); // Required cost attribute
4740
4741 //----------Instruction Attributes---------------------------------------------
4742 ins_attrib ins_cost(100); // Required cost attribute
4743 ins_attrib ins_size(8); // Required size attribute (in bits)
4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4745 // a non-matching short branch variant
4746 // of some long branch?
4747 ins_attrib ins_alignment(1); // Required alignment attribute (must
4748 // be a power of 2) specifies the
4749 // alignment that some part of the
4750 // instruction (not necessarily the
4751 // start) requires. If > 1, a
4752 // compute_padding() function must be
4753 // provided for the instruction
4754
4755 // Whether this node is expanded during code emission into a sequence of
4756 // instructions and the first instruction can perform an implicit null check.
4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4758
4759 //----------OPERANDS-----------------------------------------------------------
4760 // Operand definitions must precede instruction definitions for correct parsing
4761 // in the ADLC because operands constitute user defined types which are used in
4762 // instruction definitions.
4763
4764 //----------Simple Operands----------------------------------------------------
4765 // Immediate Operands
4766 // Integer Immediate
4767 operand immI()
4768 %{
4769 match(ConI);
4770
4771 op_cost(10);
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 // Constant for test vs zero
4777 operand immI_0()
4778 %{
4779 predicate(n->get_int() == 0);
4780 match(ConI);
4781
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Constant for increment
4788 operand immI_1()
4789 %{
4790 predicate(n->get_int() == 1);
4791 match(ConI);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Constant for decrement
4799 operand immI_M1()
4800 %{
4801 predicate(n->get_int() == -1);
4802 match(ConI);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immI_2()
4810 %{
4811 predicate(n->get_int() == 2);
4812 match(ConI);
4813
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 operand immI_4()
4820 %{
4821 predicate(n->get_int() == 4);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 operand immI_8()
4830 %{
4831 predicate(n->get_int() == 8);
4832 match(ConI);
4833
4834 op_cost(0);
4835 format %{ %}
4836 interface(CONST_INTER);
4837 %}
4838
4839 // Valid scale values for addressing modes
4840 operand immI2()
4841 %{
4842 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4843 match(ConI);
4844
4845 format %{ %}
4846 interface(CONST_INTER);
4847 %}
4848
4849 operand immU7()
4850 %{
4851 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4852 match(ConI);
4853
4854 op_cost(5);
4855 format %{ %}
4856 interface(CONST_INTER);
4857 %}
4858
4859 operand immI8()
4860 %{
4861 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4862 match(ConI);
4863
4864 op_cost(5);
4865 format %{ %}
4866 interface(CONST_INTER);
4867 %}
4868
4869 operand immU8()
4870 %{
4871 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4872 match(ConI);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879 operand immI16()
4880 %{
4881 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4882 match(ConI);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Int Immediate non-negative
4890 operand immU31()
4891 %{
4892 predicate(n->get_int() >= 0);
4893 match(ConI);
4894
4895 op_cost(0);
4896 format %{ %}
4897 interface(CONST_INTER);
4898 %}
4899
4900 // Pointer Immediate
4901 operand immP()
4902 %{
4903 match(ConP);
4904
4905 op_cost(10);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910 // Null Pointer Immediate
4911 operand immP0()
4912 %{
4913 predicate(n->get_ptr() == 0);
4914 match(ConP);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Pointer Immediate
4922 operand immN() %{
4923 match(ConN);
4924
4925 op_cost(10);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immNKlass() %{
4931 match(ConNKlass);
4932
4933 op_cost(10);
4934 format %{ %}
4935 interface(CONST_INTER);
4936 %}
4937
4938 // Null Pointer Immediate
4939 operand immN0() %{
4940 predicate(n->get_narrowcon() == 0);
4941 match(ConN);
4942
4943 op_cost(5);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 operand immP31()
4949 %{
4950 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4951 && (n->get_ptr() >> 31) == 0);
4952 match(ConP);
4953
4954 op_cost(5);
4955 format %{ %}
4956 interface(CONST_INTER);
4957 %}
4958
4959
4960 // Long Immediate
4961 operand immL()
4962 %{
4963 match(ConL);
4964
4965 op_cost(20);
4966 format %{ %}
4967 interface(CONST_INTER);
4968 %}
4969
4970 // Long Immediate 8-bit
4971 operand immL8()
4972 %{
4973 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4974 match(ConL);
4975
4976 op_cost(5);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate 32-bit unsigned
4982 operand immUL32()
4983 %{
4984 predicate(n->get_long() == (unsigned int) (n->get_long()));
4985 match(ConL);
4986
4987 op_cost(10);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Long Immediate 32-bit signed
4993 operand immL32()
4994 %{
4995 predicate(n->get_long() == (int) (n->get_long()));
4996 match(ConL);
4997
4998 op_cost(15);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003 operand immL_Pow2()
5004 %{
5005 predicate(is_power_of_2((julong)n->get_long()));
5006 match(ConL);
5007
5008 op_cost(15);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 operand immL_NotPow2()
5014 %{
5015 predicate(is_power_of_2((julong)~n->get_long()));
5016 match(ConL);
5017
5018 op_cost(15);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate zero
5024 operand immL0()
5025 %{
5026 predicate(n->get_long() == 0L);
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Constant for increment
5035 operand immL1()
5036 %{
5037 predicate(n->get_long() == 1);
5038 match(ConL);
5039
5040 format %{ %}
5041 interface(CONST_INTER);
5042 %}
5043
5044 // Constant for decrement
5045 operand immL_M1()
5046 %{
5047 predicate(n->get_long() == -1);
5048 match(ConL);
5049
5050 format %{ %}
5051 interface(CONST_INTER);
5052 %}
5053
5054 // Long Immediate: low 32-bit mask
5055 operand immL_32bits()
5056 %{
5057 predicate(n->get_long() == 0xFFFFFFFFL);
5058 match(ConL);
5059 op_cost(20);
5060
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Int Immediate: 2^n-1, positive
5066 operand immI_Pow2M1()
5067 %{
5068 predicate((n->get_int() > 0)
5069 && is_power_of_2((juint)n->get_int() + 1));
5070 match(ConI);
5071
5072 op_cost(20);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Float Immediate zero
5078 operand immF0()
5079 %{
5080 predicate(jint_cast(n->getf()) == 0);
5081 match(ConF);
5082
5083 op_cost(5);
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Float Immediate
5089 operand immF()
5090 %{
5091 match(ConF);
5092
5093 op_cost(15);
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Half Float Immediate
5099 operand immH()
5100 %{
5101 match(ConH);
5102
5103 op_cost(15);
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Double Immediate zero
5109 operand immD0()
5110 %{
5111 predicate(jlong_cast(n->getd()) == 0);
5112 match(ConD);
5113
5114 op_cost(5);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Double Immediate
5120 operand immD()
5121 %{
5122 match(ConD);
5123
5124 op_cost(15);
5125 format %{ %}
5126 interface(CONST_INTER);
5127 %}
5128
5129 // Immediates for special shifts (sign extend)
5130
5131 // Constants for increment
5132 operand immI_16()
5133 %{
5134 predicate(n->get_int() == 16);
5135 match(ConI);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand immI_24()
5142 %{
5143 predicate(n->get_int() == 24);
5144 match(ConI);
5145
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Constant for byte-wide masking
5151 operand immI_255()
5152 %{
5153 predicate(n->get_int() == 255);
5154 match(ConI);
5155
5156 format %{ %}
5157 interface(CONST_INTER);
5158 %}
5159
5160 // Constant for short-wide masking
5161 operand immI_65535()
5162 %{
5163 predicate(n->get_int() == 65535);
5164 match(ConI);
5165
5166 format %{ %}
5167 interface(CONST_INTER);
5168 %}
5169
5170 // Constant for byte-wide masking
5171 operand immL_255()
5172 %{
5173 predicate(n->get_long() == 255);
5174 match(ConL);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 // Constant for short-wide masking
5181 operand immL_65535()
5182 %{
5183 predicate(n->get_long() == 65535);
5184 match(ConL);
5185
5186 format %{ %}
5187 interface(CONST_INTER);
5188 %}
5189
5190 // AOT Runtime Constants Address
5191 operand immAOTRuntimeConstantsAddress()
5192 %{
5193 // Check if the address is in the range of AOT Runtime Constants
5194 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5195 match(ConP);
5196
5197 op_cost(0);
5198 format %{ %}
5199 interface(CONST_INTER);
5200 %}
5201
5202 operand kReg()
5203 %{
5204 constraint(ALLOC_IN_RC(vectmask_reg));
5205 match(RegVectMask);
5206 format %{%}
5207 interface(REG_INTER);
5208 %}
5209
5210 // Register Operands
5211 // Integer Register
5212 operand rRegI()
5213 %{
5214 constraint(ALLOC_IN_RC(int_reg));
5215 match(RegI);
5216
5217 match(rax_RegI);
5218 match(rbx_RegI);
5219 match(rcx_RegI);
5220 match(rdx_RegI);
5221 match(rdi_RegI);
5222
5223 format %{ %}
5224 interface(REG_INTER);
5225 %}
5226
5227 // Special Registers
5228 operand rax_RegI()
5229 %{
5230 constraint(ALLOC_IN_RC(int_rax_reg));
5231 match(RegI);
5232 match(rRegI);
5233
5234 format %{ "RAX" %}
5235 interface(REG_INTER);
5236 %}
5237
5238 // Special Registers
5239 operand rbx_RegI()
5240 %{
5241 constraint(ALLOC_IN_RC(int_rbx_reg));
5242 match(RegI);
5243 match(rRegI);
5244
5245 format %{ "RBX" %}
5246 interface(REG_INTER);
5247 %}
5248
5249 operand rcx_RegI()
5250 %{
5251 constraint(ALLOC_IN_RC(int_rcx_reg));
5252 match(RegI);
5253 match(rRegI);
5254
5255 format %{ "RCX" %}
5256 interface(REG_INTER);
5257 %}
5258
5259 operand rdx_RegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_rdx_reg));
5262 match(RegI);
5263 match(rRegI);
5264
5265 format %{ "RDX" %}
5266 interface(REG_INTER);
5267 %}
5268
5269 operand rdi_RegI()
5270 %{
5271 constraint(ALLOC_IN_RC(int_rdi_reg));
5272 match(RegI);
5273 match(rRegI);
5274
5275 format %{ "RDI" %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand no_rax_rdx_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5282 match(RegI);
5283 match(rbx_RegI);
5284 match(rcx_RegI);
5285 match(rdi_RegI);
5286
5287 format %{ %}
5288 interface(REG_INTER);
5289 %}
5290
5291 operand no_rbp_r13_RegI()
5292 %{
5293 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5294 match(RegI);
5295 match(rRegI);
5296 match(rax_RegI);
5297 match(rbx_RegI);
5298 match(rcx_RegI);
5299 match(rdx_RegI);
5300 match(rdi_RegI);
5301
5302 format %{ %}
5303 interface(REG_INTER);
5304 %}
5305
5306 // Pointer Register
5307 operand any_RegP()
5308 %{
5309 constraint(ALLOC_IN_RC(any_reg));
5310 match(RegP);
5311 match(rax_RegP);
5312 match(rbx_RegP);
5313 match(rdi_RegP);
5314 match(rsi_RegP);
5315 match(rbp_RegP);
5316 match(r15_RegP);
5317 match(rRegP);
5318
5319 format %{ %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand rRegP()
5324 %{
5325 constraint(ALLOC_IN_RC(ptr_reg));
5326 match(RegP);
5327 match(rax_RegP);
5328 match(rbx_RegP);
5329 match(rdi_RegP);
5330 match(rsi_RegP);
5331 match(rbp_RegP); // See Q&A below about
5332 match(r15_RegP); // r15_RegP and rbp_RegP.
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 operand rRegN() %{
5339 constraint(ALLOC_IN_RC(int_reg));
5340 match(RegN);
5341
5342 format %{ %}
5343 interface(REG_INTER);
5344 %}
5345
5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5349 // The output of an instruction is controlled by the allocator, which respects
5350 // register class masks, not match rules. Unless an instruction mentions
5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5352 // by the allocator as an input.
5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5355 // result, RBP is not included in the output of the instruction either.
5356
5357 // This operand is not allowed to use RBP even if
5358 // RBP is not used to hold the frame pointer.
5359 operand no_rbp_RegP()
5360 %{
5361 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5362 match(RegP);
5363 match(rbx_RegP);
5364 match(rsi_RegP);
5365 match(rdi_RegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 // Special Registers
5372 // Return a pointer value
5373 operand rax_RegP()
5374 %{
5375 constraint(ALLOC_IN_RC(ptr_rax_reg));
5376 match(RegP);
5377 match(rRegP);
5378
5379 format %{ %}
5380 interface(REG_INTER);
5381 %}
5382
5383 // Special Registers
5384 // Return a compressed pointer value
5385 operand rax_RegN()
5386 %{
5387 constraint(ALLOC_IN_RC(int_rax_reg));
5388 match(RegN);
5389 match(rRegN);
5390
5391 format %{ %}
5392 interface(REG_INTER);
5393 %}
5394
5395 // Used in AtomicAdd
5396 operand rbx_RegP()
5397 %{
5398 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5399 match(RegP);
5400 match(rRegP);
5401
5402 format %{ %}
5403 interface(REG_INTER);
5404 %}
5405
5406 operand rsi_RegP()
5407 %{
5408 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5409 match(RegP);
5410 match(rRegP);
5411
5412 format %{ %}
5413 interface(REG_INTER);
5414 %}
5415
5416 operand rbp_RegP()
5417 %{
5418 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5419 match(RegP);
5420 match(rRegP);
5421
5422 format %{ %}
5423 interface(REG_INTER);
5424 %}
5425
5426 // Used in rep stosq
5427 operand rdi_RegP()
5428 %{
5429 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5430 match(RegP);
5431 match(rRegP);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 operand r15_RegP()
5438 %{
5439 constraint(ALLOC_IN_RC(ptr_r15_reg));
5440 match(RegP);
5441 match(rRegP);
5442
5443 format %{ %}
5444 interface(REG_INTER);
5445 %}
5446
5447 operand rRegL()
5448 %{
5449 constraint(ALLOC_IN_RC(long_reg));
5450 match(RegL);
5451 match(rax_RegL);
5452 match(rdx_RegL);
5453
5454 format %{ %}
5455 interface(REG_INTER);
5456 %}
5457
5458 // Special Registers
5459 operand no_rax_rdx_RegL()
5460 %{
5461 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5462 match(RegL);
5463 match(rRegL);
5464
5465 format %{ %}
5466 interface(REG_INTER);
5467 %}
5468
5469 operand rax_RegL()
5470 %{
5471 constraint(ALLOC_IN_RC(long_rax_reg));
5472 match(RegL);
5473 match(rRegL);
5474
5475 format %{ "RAX" %}
5476 interface(REG_INTER);
5477 %}
5478
5479 operand rcx_RegL()
5480 %{
5481 constraint(ALLOC_IN_RC(long_rcx_reg));
5482 match(RegL);
5483 match(rRegL);
5484
5485 format %{ %}
5486 interface(REG_INTER);
5487 %}
5488
5489 operand rdx_RegL()
5490 %{
5491 constraint(ALLOC_IN_RC(long_rdx_reg));
5492 match(RegL);
5493 match(rRegL);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 operand r11_RegL()
5500 %{
5501 constraint(ALLOC_IN_RC(long_r11_reg));
5502 match(RegL);
5503 match(rRegL);
5504
5505 format %{ %}
5506 interface(REG_INTER);
5507 %}
5508
5509 operand no_rbp_r13_RegL()
5510 %{
5511 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5512 match(RegL);
5513 match(rRegL);
5514 match(rax_RegL);
5515 match(rcx_RegL);
5516 match(rdx_RegL);
5517
5518 format %{ %}
5519 interface(REG_INTER);
5520 %}
5521
5522 // Flags register, used as output of compare instructions
5523 operand rFlagsReg()
5524 %{
5525 constraint(ALLOC_IN_RC(int_flags));
5526 match(RegFlags);
5527
5528 format %{ "RFLAGS" %}
5529 interface(REG_INTER);
5530 %}
5531
5532 // Flags register, used as output of FLOATING POINT compare instructions
5533 operand rFlagsRegU()
5534 %{
5535 constraint(ALLOC_IN_RC(int_flags));
5536 match(RegFlags);
5537
5538 format %{ "RFLAGS_U" %}
5539 interface(REG_INTER);
5540 %}
5541
5542 operand rFlagsRegUCF() %{
5543 constraint(ALLOC_IN_RC(int_flags));
5544 match(RegFlags);
5545 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5546
5547 format %{ "RFLAGS_U_CF" %}
5548 interface(REG_INTER);
5549 %}
5550
5551 operand rFlagsRegUCFE() %{
5552 constraint(ALLOC_IN_RC(int_flags));
5553 match(RegFlags);
5554 predicate(UseAPX && VM_Version::supports_avx10_2());
5555
5556 format %{ "RFLAGS_U_CFE" %}
5557 interface(REG_INTER);
5558 %}
5559
5560 // Float register operands
5561 operand regF() %{
5562 constraint(ALLOC_IN_RC(float_reg));
5563 match(RegF);
5564
5565 format %{ %}
5566 interface(REG_INTER);
5567 %}
5568
5569 // Float register operands
5570 operand legRegF() %{
5571 constraint(ALLOC_IN_RC(float_reg_legacy));
5572 match(RegF);
5573
5574 format %{ %}
5575 interface(REG_INTER);
5576 %}
5577
5578 // Float register operands
5579 operand vlRegF() %{
5580 constraint(ALLOC_IN_RC(float_reg_vl));
5581 match(RegF);
5582
5583 format %{ %}
5584 interface(REG_INTER);
5585 %}
5586
5587 // Double register operands
5588 operand regD() %{
5589 constraint(ALLOC_IN_RC(double_reg));
5590 match(RegD);
5591
5592 format %{ %}
5593 interface(REG_INTER);
5594 %}
5595
5596 // Double register operands
5597 operand legRegD() %{
5598 constraint(ALLOC_IN_RC(double_reg_legacy));
5599 match(RegD);
5600
5601 format %{ %}
5602 interface(REG_INTER);
5603 %}
5604
5605 // Double register operands
5606 operand vlRegD() %{
5607 constraint(ALLOC_IN_RC(double_reg_vl));
5608 match(RegD);
5609
5610 format %{ %}
5611 interface(REG_INTER);
5612 %}
5613
5614 //----------Memory Operands----------------------------------------------------
5615 // Direct Memory Operand
5616 // operand direct(immP addr)
5617 // %{
5618 // match(addr);
5619
5620 // format %{ "[$addr]" %}
5621 // interface(MEMORY_INTER) %{
5622 // base(0xFFFFFFFF);
5623 // index(0x4);
5624 // scale(0x0);
5625 // disp($addr);
5626 // %}
5627 // %}
5628
5629 // Indirect Memory Operand
5630 operand indirect(any_RegP reg)
5631 %{
5632 constraint(ALLOC_IN_RC(ptr_reg));
5633 match(reg);
5634
5635 format %{ "[$reg]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index(0x4);
5639 scale(0x0);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Plus Short Offset Operand
5645 operand indOffset8(any_RegP reg, immL8 off)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP reg off);
5649
5650 format %{ "[$reg + $off (8-bit)]" %}
5651 interface(MEMORY_INTER) %{
5652 base($reg);
5653 index(0x4);
5654 scale(0x0);
5655 disp($off);
5656 %}
5657 %}
5658
5659 // Indirect Memory Plus Long Offset Operand
5660 operand indOffset32(any_RegP reg, immL32 off)
5661 %{
5662 constraint(ALLOC_IN_RC(ptr_reg));
5663 match(AddP reg off);
5664
5665 format %{ "[$reg + $off (32-bit)]" %}
5666 interface(MEMORY_INTER) %{
5667 base($reg);
5668 index(0x4);
5669 scale(0x0);
5670 disp($off);
5671 %}
5672 %}
5673
5674 // Indirect Memory Plus Index Register Plus Offset Operand
5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5676 %{
5677 constraint(ALLOC_IN_RC(ptr_reg));
5678 match(AddP (AddP reg lreg) off);
5679
5680 op_cost(10);
5681 format %{"[$reg + $off + $lreg]" %}
5682 interface(MEMORY_INTER) %{
5683 base($reg);
5684 index($lreg);
5685 scale(0x0);
5686 disp($off);
5687 %}
5688 %}
5689
5690 // Indirect Memory Plus Index Register Plus Offset Operand
5691 operand indIndex(any_RegP reg, rRegL lreg)
5692 %{
5693 constraint(ALLOC_IN_RC(ptr_reg));
5694 match(AddP reg lreg);
5695
5696 op_cost(10);
5697 format %{"[$reg + $lreg]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index($lreg);
5701 scale(0x0);
5702 disp(0x0);
5703 %}
5704 %}
5705
5706 // Indirect Memory Times Scale Plus Index Register
5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP reg (LShiftL lreg scale));
5711
5712 op_cost(10);
5713 format %{"[$reg + $lreg << $scale]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index($lreg);
5717 scale($scale);
5718 disp(0x0);
5719 %}
5720 %}
5721
5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5726 match(AddP reg (LShiftL (ConvI2L idx) scale));
5727
5728 op_cost(10);
5729 format %{"[$reg + pos $idx << $scale]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($idx);
5733 scale($scale);
5734 disp(0x0);
5735 %}
5736 %}
5737
5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP (AddP reg (LShiftL lreg scale)) off);
5743
5744 op_cost(10);
5745 format %{"[$reg + $off + $lreg << $scale]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale($scale);
5750 disp($off);
5751 %}
5752 %}
5753
5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5756 %{
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5759 match(AddP (AddP reg (ConvI2L idx)) off);
5760
5761 op_cost(10);
5762 format %{"[$reg + $off + $idx]" %}
5763 interface(MEMORY_INTER) %{
5764 base($reg);
5765 index($idx);
5766 scale(0x0);
5767 disp($off);
5768 %}
5769 %}
5770
5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5773 %{
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5776 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5777
5778 op_cost(10);
5779 format %{"[$reg + $off + $idx << $scale]" %}
5780 interface(MEMORY_INTER) %{
5781 base($reg);
5782 index($idx);
5783 scale($scale);
5784 disp($off);
5785 %}
5786 %}
5787
5788 // Indirect Narrow Oop Plus Offset Operand
5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5790 // we can't free r12 even with CompressedOops::base() == nullptr.
5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5792 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5793 constraint(ALLOC_IN_RC(ptr_reg));
5794 match(AddP (DecodeN reg) off);
5795
5796 op_cost(10);
5797 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5798 interface(MEMORY_INTER) %{
5799 base(0xc); // R12
5800 index($reg);
5801 scale(0x3);
5802 disp($off);
5803 %}
5804 %}
5805
5806 // Indirect Memory Operand
5807 operand indirectNarrow(rRegN reg)
5808 %{
5809 predicate(CompressedOops::shift() == 0);
5810 constraint(ALLOC_IN_RC(ptr_reg));
5811 match(DecodeN reg);
5812
5813 format %{ "[$reg]" %}
5814 interface(MEMORY_INTER) %{
5815 base($reg);
5816 index(0x4);
5817 scale(0x0);
5818 disp(0x0);
5819 %}
5820 %}
5821
5822 // Indirect Memory Plus Short Offset Operand
5823 operand indOffset8Narrow(rRegN reg, immL8 off)
5824 %{
5825 predicate(CompressedOops::shift() == 0);
5826 constraint(ALLOC_IN_RC(ptr_reg));
5827 match(AddP (DecodeN reg) off);
5828
5829 format %{ "[$reg + $off (8-bit)]" %}
5830 interface(MEMORY_INTER) %{
5831 base($reg);
5832 index(0x4);
5833 scale(0x0);
5834 disp($off);
5835 %}
5836 %}
5837
5838 // Indirect Memory Plus Long Offset Operand
5839 operand indOffset32Narrow(rRegN reg, immL32 off)
5840 %{
5841 predicate(CompressedOops::shift() == 0);
5842 constraint(ALLOC_IN_RC(ptr_reg));
5843 match(AddP (DecodeN reg) off);
5844
5845 format %{ "[$reg + $off (32-bit)]" %}
5846 interface(MEMORY_INTER) %{
5847 base($reg);
5848 index(0x4);
5849 scale(0x0);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Plus Index Register Plus Offset Operand
5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5856 %{
5857 predicate(CompressedOops::shift() == 0);
5858 constraint(ALLOC_IN_RC(ptr_reg));
5859 match(AddP (AddP (DecodeN reg) lreg) off);
5860
5861 op_cost(10);
5862 format %{"[$reg + $off + $lreg]" %}
5863 interface(MEMORY_INTER) %{
5864 base($reg);
5865 index($lreg);
5866 scale(0x0);
5867 disp($off);
5868 %}
5869 %}
5870
5871 // Indirect Memory Plus Index Register Plus Offset Operand
5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
5873 %{
5874 predicate(CompressedOops::shift() == 0);
5875 constraint(ALLOC_IN_RC(ptr_reg));
5876 match(AddP (DecodeN reg) lreg);
5877
5878 op_cost(10);
5879 format %{"[$reg + $lreg]" %}
5880 interface(MEMORY_INTER) %{
5881 base($reg);
5882 index($lreg);
5883 scale(0x0);
5884 disp(0x0);
5885 %}
5886 %}
5887
5888 // Indirect Memory Times Scale Plus Index Register
5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5890 %{
5891 predicate(CompressedOops::shift() == 0);
5892 constraint(ALLOC_IN_RC(ptr_reg));
5893 match(AddP (DecodeN reg) (LShiftL lreg scale));
5894
5895 op_cost(10);
5896 format %{"[$reg + $lreg << $scale]" %}
5897 interface(MEMORY_INTER) %{
5898 base($reg);
5899 index($lreg);
5900 scale($scale);
5901 disp(0x0);
5902 %}
5903 %}
5904
5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5907 %{
5908 predicate(CompressedOops::shift() == 0);
5909 constraint(ALLOC_IN_RC(ptr_reg));
5910 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5911
5912 op_cost(10);
5913 format %{"[$reg + $off + $lreg << $scale]" %}
5914 interface(MEMORY_INTER) %{
5915 base($reg);
5916 index($lreg);
5917 scale($scale);
5918 disp($off);
5919 %}
5920 %}
5921
5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5924 %{
5925 constraint(ALLOC_IN_RC(ptr_reg));
5926 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5927 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5928
5929 op_cost(10);
5930 format %{"[$reg + $off + $idx]" %}
5931 interface(MEMORY_INTER) %{
5932 base($reg);
5933 index($idx);
5934 scale(0x0);
5935 disp($off);
5936 %}
5937 %}
5938
5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5941 %{
5942 constraint(ALLOC_IN_RC(ptr_reg));
5943 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5944 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5945
5946 op_cost(10);
5947 format %{"[$reg + $off + $idx << $scale]" %}
5948 interface(MEMORY_INTER) %{
5949 base($reg);
5950 index($idx);
5951 scale($scale);
5952 disp($off);
5953 %}
5954 %}
5955
5956 //----------Special Memory Operands--------------------------------------------
5957 // Stack Slot Operand - This operand is used for loading and storing temporary
5958 // values on the stack where a match requires a value to
5959 // flow through memory.
5960 operand stackSlotP(sRegP reg)
5961 %{
5962 constraint(ALLOC_IN_RC(stack_slots));
5963 // No match rule because this operand is only generated in matching
5964
5965 format %{ "[$reg]" %}
5966 interface(MEMORY_INTER) %{
5967 base(0x4); // RSP
5968 index(0x4); // No Index
5969 scale(0x0); // No Scale
5970 disp($reg); // Stack Offset
5971 %}
5972 %}
5973
5974 operand stackSlotI(sRegI reg)
5975 %{
5976 constraint(ALLOC_IN_RC(stack_slots));
5977 // No match rule because this operand is only generated in matching
5978
5979 format %{ "[$reg]" %}
5980 interface(MEMORY_INTER) %{
5981 base(0x4); // RSP
5982 index(0x4); // No Index
5983 scale(0x0); // No Scale
5984 disp($reg); // Stack Offset
5985 %}
5986 %}
5987
5988 operand stackSlotF(sRegF reg)
5989 %{
5990 constraint(ALLOC_IN_RC(stack_slots));
5991 // No match rule because this operand is only generated in matching
5992
5993 format %{ "[$reg]" %}
5994 interface(MEMORY_INTER) %{
5995 base(0x4); // RSP
5996 index(0x4); // No Index
5997 scale(0x0); // No Scale
5998 disp($reg); // Stack Offset
5999 %}
6000 %}
6001
6002 operand stackSlotD(sRegD reg)
6003 %{
6004 constraint(ALLOC_IN_RC(stack_slots));
6005 // No match rule because this operand is only generated in matching
6006
6007 format %{ "[$reg]" %}
6008 interface(MEMORY_INTER) %{
6009 base(0x4); // RSP
6010 index(0x4); // No Index
6011 scale(0x0); // No Scale
6012 disp($reg); // Stack Offset
6013 %}
6014 %}
6015 operand stackSlotL(sRegL reg)
6016 %{
6017 constraint(ALLOC_IN_RC(stack_slots));
6018 // No match rule because this operand is only generated in matching
6019
6020 format %{ "[$reg]" %}
6021 interface(MEMORY_INTER) %{
6022 base(0x4); // RSP
6023 index(0x4); // No Index
6024 scale(0x0); // No Scale
6025 disp($reg); // Stack Offset
6026 %}
6027 %}
6028
6029 //----------Conditional Branch Operands----------------------------------------
6030 // Comparison Op - This is the operation of the comparison, and is limited to
6031 // the following set of codes:
6032 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6033 //
6034 // Other attributes of the comparison, such as unsignedness, are specified
6035 // by the comparison instruction that sets a condition code flags register.
6036 // That result is represented by a flags operand whose subtype is appropriate
6037 // to the unsignedness (etc.) of the comparison.
6038 //
6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6042
6043 // Comparison Code
6044 operand cmpOp()
6045 %{
6046 match(Bool);
6047
6048 format %{ "" %}
6049 interface(COND_INTER) %{
6050 equal(0x4, "e");
6051 not_equal(0x5, "ne");
6052 less(0xc, "l");
6053 greater_equal(0xd, "ge");
6054 less_equal(0xe, "le");
6055 greater(0xf, "g");
6056 overflow(0x0, "o");
6057 no_overflow(0x1, "no");
6058 %}
6059 %}
6060
6061 // Comparison Code, unsigned compare. Used by FP also, with
6062 // C2 (unordered) turned into GT or LT already. The other bits
6063 // C0 and C3 are turned into Carry & Zero flags.
6064 operand cmpOpU()
6065 %{
6066 match(Bool);
6067
6068 format %{ "" %}
6069 interface(COND_INTER) %{
6070 equal(0x4, "e");
6071 not_equal(0x5, "ne");
6072 less(0x2, "b");
6073 greater_equal(0x3, "ae");
6074 less_equal(0x6, "be");
6075 greater(0x7, "a");
6076 overflow(0x0, "o");
6077 no_overflow(0x1, "no");
6078 %}
6079 %}
6080
6081
6082 // Floating comparisons that don't require any fixup for the unordered case,
6083 // If both inputs of the comparison are the same, ZF is always set so we
6084 // don't need to use cmpOpUCF2 for eq/ne
6085 operand cmpOpUCF() %{
6086 match(Bool);
6087 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6088 (n->as_Bool()->_test._test == BoolTest::lt ||
6089 n->as_Bool()->_test._test == BoolTest::ge ||
6090 n->as_Bool()->_test._test == BoolTest::le ||
6091 n->as_Bool()->_test._test == BoolTest::gt ||
6092 n->in(1)->in(1) == n->in(1)->in(2)));
6093 format %{ "" %}
6094 interface(COND_INTER) %{
6095 equal(0xb, "np");
6096 not_equal(0xa, "p");
6097 less(0x2, "b");
6098 greater_equal(0x3, "ae");
6099 less_equal(0x6, "be");
6100 greater(0x7, "a");
6101 overflow(0x0, "o");
6102 no_overflow(0x1, "no");
6103 %}
6104 %}
6105
6106
6107 // Floating comparisons that can be fixed up with extra conditional jumps
6108 operand cmpOpUCF2() %{
6109 match(Bool);
6110 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6111 (n->as_Bool()->_test._test == BoolTest::ne ||
6112 n->as_Bool()->_test._test == BoolTest::eq) &&
6113 n->in(1)->in(1) != n->in(1)->in(2));
6114 format %{ "" %}
6115 interface(COND_INTER) %{
6116 equal(0x4, "e");
6117 not_equal(0x5, "ne");
6118 less(0x2, "b");
6119 greater_equal(0x3, "ae");
6120 less_equal(0x6, "be");
6121 greater(0x7, "a");
6122 overflow(0x0, "o");
6123 no_overflow(0x1, "no");
6124 %}
6125 %}
6126
6127
6128 // Floating point comparisons that set condition flags to test more directly,
6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
6131 // latter conditions to ones that use unsigned tests before passing into an
6132 // instruction because the preceding comparison might be based on a three way
6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6134 operand cmpOpUCFE()
6135 %{
6136 match(Bool);
6137 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6138 (n->as_Bool()->_test._test == BoolTest::ne ||
6139 n->as_Bool()->_test._test == BoolTest::eq ||
6140 n->as_Bool()->_test._test == BoolTest::lt ||
6141 n->as_Bool()->_test._test == BoolTest::ge ||
6142 n->as_Bool()->_test._test == BoolTest::le ||
6143 n->as_Bool()->_test._test == BoolTest::gt));
6144
6145 format %{ "" %}
6146 interface(COND_INTER) %{
6147 equal(0x4, "e");
6148 not_equal(0x5, "ne");
6149 less(0x2, "b");
6150 greater_equal(0x3, "ae");
6151 less_equal(0x6, "be");
6152 greater(0x7, "a");
6153 overflow(0x0, "o");
6154 no_overflow(0x1, "no");
6155 %}
6156 %}
6157
6158 // Operands for bound floating pointer register arguments
6159 operand rxmm0() %{
6160 constraint(ALLOC_IN_RC(xmm0_reg));
6161 match(VecX);
6162 format%{%}
6163 interface(REG_INTER);
6164 %}
6165
6166 // Vectors
6167
6168 // Dummy generic vector class. Should be used for all vector operands.
6169 // Replaced with vec[SDXYZ] during post-selection pass.
6170 operand vec() %{
6171 constraint(ALLOC_IN_RC(dynamic));
6172 match(VecX);
6173 match(VecY);
6174 match(VecZ);
6175 match(VecS);
6176 match(VecD);
6177
6178 format %{ %}
6179 interface(REG_INTER);
6180 %}
6181
6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6185 // runtime code generation via reg_class_dynamic.
6186 operand legVec() %{
6187 constraint(ALLOC_IN_RC(dynamic));
6188 match(VecX);
6189 match(VecY);
6190 match(VecZ);
6191 match(VecS);
6192 match(VecD);
6193
6194 format %{ %}
6195 interface(REG_INTER);
6196 %}
6197
6198 // Replaces vec during post-selection cleanup. See above.
6199 operand vecS() %{
6200 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6201 match(VecS);
6202
6203 format %{ %}
6204 interface(REG_INTER);
6205 %}
6206
6207 // Replaces legVec during post-selection cleanup. See above.
6208 operand legVecS() %{
6209 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6210 match(VecS);
6211
6212 format %{ %}
6213 interface(REG_INTER);
6214 %}
6215
6216 // Replaces vec during post-selection cleanup. See above.
6217 operand vecD() %{
6218 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6219 match(VecD);
6220
6221 format %{ %}
6222 interface(REG_INTER);
6223 %}
6224
6225 // Replaces legVec during post-selection cleanup. See above.
6226 operand legVecD() %{
6227 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6228 match(VecD);
6229
6230 format %{ %}
6231 interface(REG_INTER);
6232 %}
6233
6234 // Replaces vec during post-selection cleanup. See above.
6235 operand vecX() %{
6236 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6237 match(VecX);
6238
6239 format %{ %}
6240 interface(REG_INTER);
6241 %}
6242
6243 // Replaces legVec during post-selection cleanup. See above.
6244 operand legVecX() %{
6245 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6246 match(VecX);
6247
6248 format %{ %}
6249 interface(REG_INTER);
6250 %}
6251
6252 // Replaces vec during post-selection cleanup. See above.
6253 operand vecY() %{
6254 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6255 match(VecY);
6256
6257 format %{ %}
6258 interface(REG_INTER);
6259 %}
6260
6261 // Replaces legVec during post-selection cleanup. See above.
6262 operand legVecY() %{
6263 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6264 match(VecY);
6265
6266 format %{ %}
6267 interface(REG_INTER);
6268 %}
6269
6270 // Replaces vec during post-selection cleanup. See above.
6271 operand vecZ() %{
6272 constraint(ALLOC_IN_RC(vectorz_reg));
6273 match(VecZ);
6274
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 // Replaces legVec during post-selection cleanup. See above.
6280 operand legVecZ() %{
6281 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6282 match(VecZ);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------OPERAND CLASSES----------------------------------------------------
6289 // Operand Classes are groups of operands that are used as to simplify
6290 // instruction definitions by not requiring the AD writer to specify separate
6291 // instructions for every form of operand when the instruction accepts
6292 // multiple operand types with the same basic encoding and format. The classic
6293 // case of this is memory operands.
6294
6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6296 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6297 indCompressedOopOffset,
6298 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6299 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6300 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6301
6302 //----------PIPELINE-----------------------------------------------------------
6303 // Rules which define the behavior of the target architectures pipeline.
6304 pipeline %{
6305
6306 //----------ATTRIBUTES---------------------------------------------------------
6307 attributes %{
6308 variable_size_instructions; // Fixed size instructions
6309 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6310 instruction_unit_size = 1; // An instruction is 1 bytes long
6311 instruction_fetch_unit_size = 16; // The processor fetches one line
6312 instruction_fetch_units = 1; // of 16 bytes
6313 %}
6314
6315 //----------RESOURCES----------------------------------------------------------
6316 // Resources are the functional units available to the machine
6317
6318 // Generic P2/P3 pipeline
6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6320 // 3 instructions decoded per cycle.
6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6322 // 3 ALU op, only ALU0 handles mul instructions.
6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6324 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6325 BR, FPU,
6326 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6327
6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
6329 // Pipeline Description specifies the stages in the machine's pipeline
6330
6331 // Generic P2/P3 pipeline
6332 pipe_desc(S0, S1, S2, S3, S4, S5);
6333
6334 //----------PIPELINE CLASSES---------------------------------------------------
6335 // Pipeline Classes describe the stages in which input and output are
6336 // referenced by the hardware pipeline.
6337
6338 // Naming convention: ialu or fpu
6339 // Then: _reg
6340 // Then: _reg if there is a 2nd register
6341 // Then: _long if it's a pair of instructions implementing a long
6342 // Then: _fat if it requires the big decoder
6343 // Or: _mem if it requires the big decoder and a memory unit.
6344
6345 // Integer ALU reg operation
6346 pipe_class ialu_reg(rRegI dst)
6347 %{
6348 single_instruction;
6349 dst : S4(write);
6350 dst : S3(read);
6351 DECODE : S0; // any decoder
6352 ALU : S3; // any alu
6353 %}
6354
6355 // Long ALU reg operation
6356 pipe_class ialu_reg_long(rRegL dst)
6357 %{
6358 instruction_count(2);
6359 dst : S4(write);
6360 dst : S3(read);
6361 DECODE : S0(2); // any 2 decoders
6362 ALU : S3(2); // both alus
6363 %}
6364
6365 // Integer ALU reg operation using big decoder
6366 pipe_class ialu_reg_fat(rRegI dst)
6367 %{
6368 single_instruction;
6369 dst : S4(write);
6370 dst : S3(read);
6371 D0 : S0; // big decoder only
6372 ALU : S3; // any alu
6373 %}
6374
6375 // Integer ALU reg-reg operation
6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6377 %{
6378 single_instruction;
6379 dst : S4(write);
6380 src : S3(read);
6381 DECODE : S0; // any decoder
6382 ALU : S3; // any alu
6383 %}
6384
6385 // Integer ALU reg-reg operation
6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6387 %{
6388 single_instruction;
6389 dst : S4(write);
6390 src : S3(read);
6391 D0 : S0; // big decoder only
6392 ALU : S3; // any alu
6393 %}
6394
6395 // Integer ALU reg-mem operation
6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6397 %{
6398 single_instruction;
6399 dst : S5(write);
6400 mem : S3(read);
6401 D0 : S0; // big decoder only
6402 ALU : S4; // any alu
6403 MEM : S3; // any mem
6404 %}
6405
6406 // Integer mem operation (prefetch)
6407 pipe_class ialu_mem(memory mem)
6408 %{
6409 single_instruction;
6410 mem : S3(read);
6411 D0 : S0; // big decoder only
6412 MEM : S3; // any mem
6413 %}
6414
6415 // Integer Store to Memory
6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
6417 %{
6418 single_instruction;
6419 mem : S3(read);
6420 src : S5(read);
6421 D0 : S0; // big decoder only
6422 ALU : S4; // any alu
6423 MEM : S3;
6424 %}
6425
6426 // // Long Store to Memory
6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6428 // %{
6429 // instruction_count(2);
6430 // mem : S3(read);
6431 // src : S5(read);
6432 // D0 : S0(2); // big decoder only; twice
6433 // ALU : S4(2); // any 2 alus
6434 // MEM : S3(2); // Both mems
6435 // %}
6436
6437 // Integer Store to Memory
6438 pipe_class ialu_mem_imm(memory mem)
6439 %{
6440 single_instruction;
6441 mem : S3(read);
6442 D0 : S0; // big decoder only
6443 ALU : S4; // any alu
6444 MEM : S3;
6445 %}
6446
6447 // Integer ALU0 reg-reg operation
6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6449 %{
6450 single_instruction;
6451 dst : S4(write);
6452 src : S3(read);
6453 D0 : S0; // Big decoder only
6454 ALU0 : S3; // only alu0
6455 %}
6456
6457 // Integer ALU0 reg-mem operation
6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6459 %{
6460 single_instruction;
6461 dst : S5(write);
6462 mem : S3(read);
6463 D0 : S0; // big decoder only
6464 ALU0 : S4; // ALU0 only
6465 MEM : S3; // any mem
6466 %}
6467
6468 // Integer ALU reg-reg operation
6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6470 %{
6471 single_instruction;
6472 cr : S4(write);
6473 src1 : S3(read);
6474 src2 : S3(read);
6475 DECODE : S0; // any decoder
6476 ALU : S3; // any alu
6477 %}
6478
6479 // Integer ALU reg-imm operation
6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6481 %{
6482 single_instruction;
6483 cr : S4(write);
6484 src1 : S3(read);
6485 DECODE : S0; // any decoder
6486 ALU : S3; // any alu
6487 %}
6488
6489 // Integer ALU reg-mem operation
6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6491 %{
6492 single_instruction;
6493 cr : S4(write);
6494 src1 : S3(read);
6495 src2 : S3(read);
6496 D0 : S0; // big decoder only
6497 ALU : S4; // any alu
6498 MEM : S3;
6499 %}
6500
6501 // Conditional move reg-reg
6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6503 %{
6504 instruction_count(4);
6505 y : S4(read);
6506 q : S3(read);
6507 p : S3(read);
6508 DECODE : S0(4); // any decoder
6509 %}
6510
6511 // Conditional move reg-reg
6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6513 %{
6514 single_instruction;
6515 dst : S4(write);
6516 src : S3(read);
6517 cr : S3(read);
6518 DECODE : S0; // any decoder
6519 %}
6520
6521 // Conditional move reg-mem
6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6523 %{
6524 single_instruction;
6525 dst : S4(write);
6526 src : S3(read);
6527 cr : S3(read);
6528 DECODE : S0; // any decoder
6529 MEM : S3;
6530 %}
6531
6532 // Conditional move reg-reg long
6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6534 %{
6535 single_instruction;
6536 dst : S4(write);
6537 src : S3(read);
6538 cr : S3(read);
6539 DECODE : S0(2); // any 2 decoders
6540 %}
6541
6542 // Float reg-reg operation
6543 pipe_class fpu_reg(regD dst)
6544 %{
6545 instruction_count(2);
6546 dst : S3(read);
6547 DECODE : S0(2); // any 2 decoders
6548 FPU : S3;
6549 %}
6550
6551 // Float reg-reg operation
6552 pipe_class fpu_reg_reg(regD dst, regD src)
6553 %{
6554 instruction_count(2);
6555 dst : S4(write);
6556 src : S3(read);
6557 DECODE : S0(2); // any 2 decoders
6558 FPU : S3;
6559 %}
6560
6561 // Float reg-reg operation
6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6563 %{
6564 instruction_count(3);
6565 dst : S4(write);
6566 src1 : S3(read);
6567 src2 : S3(read);
6568 DECODE : S0(3); // any 3 decoders
6569 FPU : S3(2);
6570 %}
6571
6572 // Float reg-reg operation
6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6574 %{
6575 instruction_count(4);
6576 dst : S4(write);
6577 src1 : S3(read);
6578 src2 : S3(read);
6579 src3 : S3(read);
6580 DECODE : S0(4); // any 3 decoders
6581 FPU : S3(2);
6582 %}
6583
6584 // Float reg-reg operation
6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6586 %{
6587 instruction_count(4);
6588 dst : S4(write);
6589 src1 : S3(read);
6590 src2 : S3(read);
6591 src3 : S3(read);
6592 DECODE : S1(3); // any 3 decoders
6593 D0 : S0; // Big decoder only
6594 FPU : S3(2);
6595 MEM : S3;
6596 %}
6597
6598 // Float reg-mem operation
6599 pipe_class fpu_reg_mem(regD dst, memory mem)
6600 %{
6601 instruction_count(2);
6602 dst : S5(write);
6603 mem : S3(read);
6604 D0 : S0; // big decoder only
6605 DECODE : S1; // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // Float reg-mem operation
6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6612 %{
6613 instruction_count(3);
6614 dst : S5(write);
6615 src1 : S3(read);
6616 mem : S3(read);
6617 D0 : S0; // big decoder only
6618 DECODE : S1(2); // any decoder for FPU POP
6619 FPU : S4;
6620 MEM : S3; // any mem
6621 %}
6622
6623 // Float mem-reg operation
6624 pipe_class fpu_mem_reg(memory mem, regD src)
6625 %{
6626 instruction_count(2);
6627 src : S5(read);
6628 mem : S3(read);
6629 DECODE : S0; // any decoder for FPU PUSH
6630 D0 : S1; // big decoder only
6631 FPU : S4;
6632 MEM : S3; // any mem
6633 %}
6634
6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6636 %{
6637 instruction_count(3);
6638 src1 : S3(read);
6639 src2 : S3(read);
6640 mem : S3(read);
6641 DECODE : S0(2); // any decoder for FPU PUSH
6642 D0 : S1; // big decoder only
6643 FPU : S4;
6644 MEM : S3; // any mem
6645 %}
6646
6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6648 %{
6649 instruction_count(3);
6650 src1 : S3(read);
6651 src2 : S3(read);
6652 mem : S4(read);
6653 DECODE : S0; // any decoder for FPU PUSH
6654 D0 : S0(2); // big decoder only
6655 FPU : S4;
6656 MEM : S3(2); // any mem
6657 %}
6658
6659 pipe_class fpu_mem_mem(memory dst, memory src1)
6660 %{
6661 instruction_count(2);
6662 src1 : S3(read);
6663 dst : S4(read);
6664 D0 : S0(2); // big decoder only
6665 MEM : S3(2); // any mem
6666 %}
6667
6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6669 %{
6670 instruction_count(3);
6671 src1 : S3(read);
6672 src2 : S3(read);
6673 dst : S4(read);
6674 D0 : S0(3); // big decoder only
6675 FPU : S4;
6676 MEM : S3(3); // any mem
6677 %}
6678
6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6680 %{
6681 instruction_count(3);
6682 src1 : S4(read);
6683 mem : S4(read);
6684 DECODE : S0; // any decoder for FPU PUSH
6685 D0 : S0(2); // big decoder only
6686 FPU : S4;
6687 MEM : S3(2); // any mem
6688 %}
6689
6690 // Float load constant
6691 pipe_class fpu_reg_con(regD dst)
6692 %{
6693 instruction_count(2);
6694 dst : S5(write);
6695 D0 : S0; // big decoder only for the load
6696 DECODE : S1; // any decoder for FPU POP
6697 FPU : S4;
6698 MEM : S3; // any mem
6699 %}
6700
6701 // Float load constant
6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
6703 %{
6704 instruction_count(3);
6705 dst : S5(write);
6706 src : S3(read);
6707 D0 : S0; // big decoder only for the load
6708 DECODE : S1(2); // any decoder for FPU POP
6709 FPU : S4;
6710 MEM : S3; // any mem
6711 %}
6712
6713 // UnConditional branch
6714 pipe_class pipe_jmp(label labl)
6715 %{
6716 single_instruction;
6717 BR : S3;
6718 %}
6719
6720 // Conditional branch
6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6722 %{
6723 single_instruction;
6724 cr : S1(read);
6725 BR : S3;
6726 %}
6727
6728 // Allocation idiom
6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6730 %{
6731 instruction_count(1); force_serialization;
6732 fixed_latency(6);
6733 heap_ptr : S3(read);
6734 DECODE : S0(3);
6735 D0 : S2;
6736 MEM : S3;
6737 ALU : S3(2);
6738 dst : S5(write);
6739 BR : S5;
6740 %}
6741
6742 // Generic big/slow expanded idiom
6743 pipe_class pipe_slow()
6744 %{
6745 instruction_count(10); multiple_bundles; force_serialization;
6746 fixed_latency(100);
6747 D0 : S0(2);
6748 MEM : S3(2);
6749 %}
6750
6751 // The real do-nothing guy
6752 pipe_class empty()
6753 %{
6754 instruction_count(0);
6755 %}
6756
6757 // Define the class for the Nop node
6758 define
6759 %{
6760 MachNop = empty;
6761 %}
6762
6763 %}
6764
6765 //----------INSTRUCTIONS-------------------------------------------------------
6766 //
6767 // match -- States which machine-independent subtree may be replaced
6768 // by this instruction.
6769 // ins_cost -- The estimated cost of this instruction is used by instruction
6770 // selection to identify a minimum cost tree of machine
6771 // instructions that matches a tree of machine-independent
6772 // instructions.
6773 // format -- A string providing the disassembly for this instruction.
6774 // The value of an instruction's operand may be inserted
6775 // by referring to it with a '$' prefix.
6776 // opcode -- Three instruction opcodes may be provided. These are referred
6777 // to within an encode class as $primary, $secondary, and $tertiary
6778 // rrspectively. The primary opcode is commonly used to
6779 // indicate the type of machine instruction, while secondary
6780 // and tertiary are often used for prefix options or addressing
6781 // modes.
6782 // ins_encode -- A list of encode classes with parameters. The encode class
6783 // name must have been defined in an 'enc_class' specification
6784 // in the encode section of the architecture description.
6785
6786 // ============================================================================
6787
6788 instruct ShouldNotReachHere() %{
6789 match(Halt);
6790 format %{ "stop\t# ShouldNotReachHere" %}
6791 ins_encode %{
6792 if (is_reachable()) {
6793 const char* str = __ code_string(_halt_reason);
6794 __ stop(str);
6795 }
6796 %}
6797 ins_pipe(pipe_slow);
6798 %}
6799
6800 // ============================================================================
6801
6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6803 // Load Float
6804 instruct MoveF2VL(vlRegF dst, regF src) %{
6805 match(Set dst src);
6806 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6807 ins_encode %{
6808 ShouldNotReachHere();
6809 %}
6810 ins_pipe( fpu_reg_reg );
6811 %}
6812
6813 // Load Float
6814 instruct MoveF2LEG(legRegF dst, regF src) %{
6815 match(Set dst src);
6816 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6817 ins_encode %{
6818 ShouldNotReachHere();
6819 %}
6820 ins_pipe( fpu_reg_reg );
6821 %}
6822
6823 // Load Float
6824 instruct MoveVL2F(regF dst, vlRegF src) %{
6825 match(Set dst src);
6826 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6827 ins_encode %{
6828 ShouldNotReachHere();
6829 %}
6830 ins_pipe( fpu_reg_reg );
6831 %}
6832
6833 // Load Float
6834 instruct MoveLEG2F(regF dst, legRegF src) %{
6835 match(Set dst src);
6836 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6837 ins_encode %{
6838 ShouldNotReachHere();
6839 %}
6840 ins_pipe( fpu_reg_reg );
6841 %}
6842
6843 // Load Double
6844 instruct MoveD2VL(vlRegD dst, regD src) %{
6845 match(Set dst src);
6846 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6847 ins_encode %{
6848 ShouldNotReachHere();
6849 %}
6850 ins_pipe( fpu_reg_reg );
6851 %}
6852
6853 // Load Double
6854 instruct MoveD2LEG(legRegD dst, regD src) %{
6855 match(Set dst src);
6856 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6857 ins_encode %{
6858 ShouldNotReachHere();
6859 %}
6860 ins_pipe( fpu_reg_reg );
6861 %}
6862
6863 // Load Double
6864 instruct MoveVL2D(regD dst, vlRegD src) %{
6865 match(Set dst src);
6866 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6867 ins_encode %{
6868 ShouldNotReachHere();
6869 %}
6870 ins_pipe( fpu_reg_reg );
6871 %}
6872
6873 // Load Double
6874 instruct MoveLEG2D(regD dst, legRegD src) %{
6875 match(Set dst src);
6876 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6877 ins_encode %{
6878 ShouldNotReachHere();
6879 %}
6880 ins_pipe( fpu_reg_reg );
6881 %}
6882
6883 //----------Load/Store/Move Instructions---------------------------------------
6884 //----------Load Instructions--------------------------------------------------
6885
6886 // Load Byte (8 bit signed)
6887 instruct loadB(rRegI dst, memory mem)
6888 %{
6889 match(Set dst (LoadB mem));
6890
6891 ins_cost(125);
6892 format %{ "movsbl $dst, $mem\t# byte" %}
6893
6894 ins_encode %{
6895 __ movsbl($dst$$Register, $mem$$Address);
6896 %}
6897
6898 ins_pipe(ialu_reg_mem);
6899 %}
6900
6901 // Load Byte (8 bit signed) into Long Register
6902 instruct loadB2L(rRegL dst, memory mem)
6903 %{
6904 match(Set dst (ConvI2L (LoadB mem)));
6905
6906 ins_cost(125);
6907 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6908
6909 ins_encode %{
6910 __ movsbq($dst$$Register, $mem$$Address);
6911 %}
6912
6913 ins_pipe(ialu_reg_mem);
6914 %}
6915
6916 // Load Unsigned Byte (8 bit UNsigned)
6917 instruct loadUB(rRegI dst, memory mem)
6918 %{
6919 match(Set dst (LoadUB mem));
6920
6921 ins_cost(125);
6922 format %{ "movzbl $dst, $mem\t# ubyte" %}
6923
6924 ins_encode %{
6925 __ movzbl($dst$$Register, $mem$$Address);
6926 %}
6927
6928 ins_pipe(ialu_reg_mem);
6929 %}
6930
6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6932 instruct loadUB2L(rRegL dst, memory mem)
6933 %{
6934 match(Set dst (ConvI2L (LoadUB mem)));
6935
6936 ins_cost(125);
6937 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6938
6939 ins_encode %{
6940 __ movzbq($dst$$Register, $mem$$Address);
6941 %}
6942
6943 ins_pipe(ialu_reg_mem);
6944 %}
6945
6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6948 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6949 effect(KILL cr);
6950
6951 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6952 "andl $dst, right_n_bits($mask, 8)" %}
6953 ins_encode %{
6954 Register Rdst = $dst$$Register;
6955 __ movzbq(Rdst, $mem$$Address);
6956 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6957 %}
6958 ins_pipe(ialu_reg_mem);
6959 %}
6960
6961 // Load Short (16 bit signed)
6962 instruct loadS(rRegI dst, memory mem)
6963 %{
6964 match(Set dst (LoadS mem));
6965
6966 ins_cost(125);
6967 format %{ "movswl $dst, $mem\t# short" %}
6968
6969 ins_encode %{
6970 __ movswl($dst$$Register, $mem$$Address);
6971 %}
6972
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Short (16 bit signed) to Byte (8 bit signed)
6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6978 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6979
6980 ins_cost(125);
6981 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6982 ins_encode %{
6983 __ movsbl($dst$$Register, $mem$$Address);
6984 %}
6985 ins_pipe(ialu_reg_mem);
6986 %}
6987
6988 // Load Short (16 bit signed) into Long Register
6989 instruct loadS2L(rRegL dst, memory mem)
6990 %{
6991 match(Set dst (ConvI2L (LoadS mem)));
6992
6993 ins_cost(125);
6994 format %{ "movswq $dst, $mem\t# short -> long" %}
6995
6996 ins_encode %{
6997 __ movswq($dst$$Register, $mem$$Address);
6998 %}
6999
7000 ins_pipe(ialu_reg_mem);
7001 %}
7002
7003 // Load Unsigned Short/Char (16 bit UNsigned)
7004 instruct loadUS(rRegI dst, memory mem)
7005 %{
7006 match(Set dst (LoadUS mem));
7007
7008 ins_cost(125);
7009 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7010
7011 ins_encode %{
7012 __ movzwl($dst$$Register, $mem$$Address);
7013 %}
7014
7015 ins_pipe(ialu_reg_mem);
7016 %}
7017
7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7020 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7021
7022 ins_cost(125);
7023 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7024 ins_encode %{
7025 __ movsbl($dst$$Register, $mem$$Address);
7026 %}
7027 ins_pipe(ialu_reg_mem);
7028 %}
7029
7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7031 instruct loadUS2L(rRegL dst, memory mem)
7032 %{
7033 match(Set dst (ConvI2L (LoadUS mem)));
7034
7035 ins_cost(125);
7036 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7037
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041
7042 ins_pipe(ialu_reg_mem);
7043 %}
7044
7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7047 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7048
7049 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7050 ins_encode %{
7051 __ movzbq($dst$$Register, $mem$$Address);
7052 %}
7053 ins_pipe(ialu_reg_mem);
7054 %}
7055
7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7058 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7059 effect(KILL cr);
7060
7061 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7062 "andl $dst, right_n_bits($mask, 16)" %}
7063 ins_encode %{
7064 Register Rdst = $dst$$Register;
7065 __ movzwq(Rdst, $mem$$Address);
7066 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7067 %}
7068 ins_pipe(ialu_reg_mem);
7069 %}
7070
7071 // Load Integer
7072 instruct loadI(rRegI dst, memory mem)
7073 %{
7074 match(Set dst (LoadI mem));
7075
7076 ins_cost(125);
7077 format %{ "movl $dst, $mem\t# int" %}
7078
7079 ins_encode %{
7080 __ movl($dst$$Register, $mem$$Address);
7081 %}
7082
7083 ins_pipe(ialu_reg_mem);
7084 %}
7085
7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7088 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7089
7090 ins_cost(125);
7091 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7092 ins_encode %{
7093 __ movsbl($dst$$Register, $mem$$Address);
7094 %}
7095 ins_pipe(ialu_reg_mem);
7096 %}
7097
7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7100 match(Set dst (AndI (LoadI mem) mask));
7101
7102 ins_cost(125);
7103 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7104 ins_encode %{
7105 __ movzbl($dst$$Register, $mem$$Address);
7106 %}
7107 ins_pipe(ialu_reg_mem);
7108 %}
7109
7110 // Load Integer (32 bit signed) to Short (16 bit signed)
7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7112 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7113
7114 ins_cost(125);
7115 format %{ "movswl $dst, $mem\t# int -> short" %}
7116 ins_encode %{
7117 __ movswl($dst$$Register, $mem$$Address);
7118 %}
7119 ins_pipe(ialu_reg_mem);
7120 %}
7121
7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7124 match(Set dst (AndI (LoadI mem) mask));
7125
7126 ins_cost(125);
7127 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7128 ins_encode %{
7129 __ movzwl($dst$$Register, $mem$$Address);
7130 %}
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer into Long Register
7135 instruct loadI2L(rRegL dst, memory mem)
7136 %{
7137 match(Set dst (ConvI2L (LoadI mem)));
7138
7139 ins_cost(125);
7140 format %{ "movslq $dst, $mem\t# int -> long" %}
7141
7142 ins_encode %{
7143 __ movslq($dst$$Register, $mem$$Address);
7144 %}
7145
7146 ins_pipe(ialu_reg_mem);
7147 %}
7148
7149 // Load Integer with mask 0xFF into Long Register
7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7151 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7152
7153 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7154 ins_encode %{
7155 __ movzbq($dst$$Register, $mem$$Address);
7156 %}
7157 ins_pipe(ialu_reg_mem);
7158 %}
7159
7160 // Load Integer with mask 0xFFFF into Long Register
7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7162 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7163
7164 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7165 ins_encode %{
7166 __ movzwq($dst$$Register, $mem$$Address);
7167 %}
7168 ins_pipe(ialu_reg_mem);
7169 %}
7170
7171 // Load Integer with a 31-bit mask into Long Register
7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7173 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7174 effect(KILL cr);
7175
7176 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7177 "andl $dst, $mask" %}
7178 ins_encode %{
7179 Register Rdst = $dst$$Register;
7180 __ movl(Rdst, $mem$$Address);
7181 __ andl(Rdst, $mask$$constant);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Unsigned Integer into Long Register
7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7188 %{
7189 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7190
7191 ins_cost(125);
7192 format %{ "movl $dst, $mem\t# uint -> long" %}
7193
7194 ins_encode %{
7195 __ movl($dst$$Register, $mem$$Address);
7196 %}
7197
7198 ins_pipe(ialu_reg_mem);
7199 %}
7200
7201 // Load Long
7202 instruct loadL(rRegL dst, memory mem)
7203 %{
7204 match(Set dst (LoadL mem));
7205
7206 ins_cost(125);
7207 format %{ "movq $dst, $mem\t# long" %}
7208
7209 ins_encode %{
7210 __ movq($dst$$Register, $mem$$Address);
7211 %}
7212
7213 ins_pipe(ialu_reg_mem); // XXX
7214 %}
7215
7216 // Load Range
7217 instruct loadRange(rRegI dst, memory mem)
7218 %{
7219 match(Set dst (LoadRange mem));
7220
7221 ins_cost(125); // XXX
7222 format %{ "movl $dst, $mem\t# range" %}
7223 ins_encode %{
7224 __ movl($dst$$Register, $mem$$Address);
7225 %}
7226 ins_pipe(ialu_reg_mem);
7227 %}
7228
7229 // Load Pointer
7230 instruct loadP(rRegP dst, memory mem)
7231 %{
7232 match(Set dst (LoadP mem));
7233 predicate(n->as_Load()->barrier_data() == 0);
7234
7235 ins_cost(125); // XXX
7236 format %{ "movq $dst, $mem\t# ptr" %}
7237 ins_encode %{
7238 __ movq($dst$$Register, $mem$$Address);
7239 %}
7240 ins_pipe(ialu_reg_mem); // XXX
7241 %}
7242
7243 // Load Compressed Pointer
7244 instruct loadN(rRegN dst, memory mem)
7245 %{
7246 predicate(n->as_Load()->barrier_data() == 0);
7247 match(Set dst (LoadN mem));
7248
7249 ins_cost(125); // XXX
7250 format %{ "movl $dst, $mem\t# compressed ptr" %}
7251 ins_encode %{
7252 __ movl($dst$$Register, $mem$$Address);
7253 %}
7254 ins_pipe(ialu_reg_mem); // XXX
7255 %}
7256
7257
7258 // Load Klass Pointer
7259 instruct loadKlass(rRegP dst, memory mem)
7260 %{
7261 match(Set dst (LoadKlass mem));
7262
7263 ins_cost(125); // XXX
7264 format %{ "movq $dst, $mem\t# class" %}
7265 ins_encode %{
7266 __ movq($dst$$Register, $mem$$Address);
7267 %}
7268 ins_pipe(ialu_reg_mem); // XXX
7269 %}
7270
7271 // Load narrow Klass Pointer
7272 instruct loadNKlass(rRegN dst, memory mem)
7273 %{
7274 predicate(!UseCompactObjectHeaders);
7275 match(Set dst (LoadNKlass mem));
7276
7277 ins_cost(125); // XXX
7278 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7279 ins_encode %{
7280 __ movl($dst$$Register, $mem$$Address);
7281 %}
7282 ins_pipe(ialu_reg_mem); // XXX
7283 %}
7284
7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7286 %{
7287 predicate(UseCompactObjectHeaders);
7288 match(Set dst (LoadNKlass mem));
7289 effect(KILL cr);
7290 ins_cost(125);
7291 format %{
7292 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7293 "shrl $dst, markWord::klass_shift_at_offset"
7294 %}
7295 ins_encode %{
7296 if (UseAPX) {
7297 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7298 }
7299 else {
7300 __ movl($dst$$Register, $mem$$Address);
7301 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7302 }
7303 %}
7304 ins_pipe(ialu_reg_mem);
7305 %}
7306
7307 // Load Float
7308 instruct loadF(regF dst, memory mem)
7309 %{
7310 match(Set dst (LoadF mem));
7311
7312 ins_cost(145); // XXX
7313 format %{ "movss $dst, $mem\t# float" %}
7314 ins_encode %{
7315 __ movflt($dst$$XMMRegister, $mem$$Address);
7316 %}
7317 ins_pipe(pipe_slow); // XXX
7318 %}
7319
7320 // Load Double
7321 instruct loadD_partial(regD dst, memory mem)
7322 %{
7323 predicate(!UseXmmLoadAndClearUpper);
7324 match(Set dst (LoadD mem));
7325
7326 ins_cost(145); // XXX
7327 format %{ "movlpd $dst, $mem\t# double" %}
7328 ins_encode %{
7329 __ movdbl($dst$$XMMRegister, $mem$$Address);
7330 %}
7331 ins_pipe(pipe_slow); // XXX
7332 %}
7333
7334 instruct loadD(regD dst, memory mem)
7335 %{
7336 predicate(UseXmmLoadAndClearUpper);
7337 match(Set dst (LoadD mem));
7338
7339 ins_cost(145); // XXX
7340 format %{ "movsd $dst, $mem\t# double" %}
7341 ins_encode %{
7342 __ movdbl($dst$$XMMRegister, $mem$$Address);
7343 %}
7344 ins_pipe(pipe_slow); // XXX
7345 %}
7346
7347 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7348 %{
7349 match(Set dst con);
7350
7351 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7352
7353 ins_encode %{
7354 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7355 %}
7356
7357 ins_pipe(ialu_reg_fat);
7358 %}
7359
7360 // max = java.lang.Math.max(float a, float b)
7361 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7362 predicate(VM_Version::supports_avx10_2());
7363 match(Set dst (MaxF a b));
7364 format %{ "maxF $dst, $a, $b" %}
7365 ins_encode %{
7366 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7367 %}
7368 ins_pipe( pipe_slow );
7369 %}
7370
7371 // max = java.lang.Math.max(float a, float b)
7372 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7373 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7374 match(Set dst (MaxF a b));
7375 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7376 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7377 ins_encode %{
7378 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7379 %}
7380 ins_pipe( pipe_slow );
7381 %}
7382
7383 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7384 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7385 match(Set dst (MaxF a b));
7386 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7387
7388 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7389 ins_encode %{
7390 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7391 false /*min*/, true /*single*/);
7392 %}
7393 ins_pipe( pipe_slow );
7394 %}
7395
7396 // max = java.lang.Math.max(double a, double b)
7397 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7398 predicate(VM_Version::supports_avx10_2());
7399 match(Set dst (MaxD a b));
7400 format %{ "maxD $dst, $a, $b" %}
7401 ins_encode %{
7402 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7403 %}
7404 ins_pipe( pipe_slow );
7405 %}
7406
7407 // max = java.lang.Math.max(double a, double b)
7408 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7409 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7410 match(Set dst (MaxD a b));
7411 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7412 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7413 ins_encode %{
7414 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7415 %}
7416 ins_pipe( pipe_slow );
7417 %}
7418
7419 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7420 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7421 match(Set dst (MaxD a b));
7422 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7423
7424 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7425 ins_encode %{
7426 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7427 false /*min*/, false /*single*/);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 // max = java.lang.Math.min(float a, float b)
7433 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7434 predicate(VM_Version::supports_avx10_2());
7435 match(Set dst (MinF a b));
7436 format %{ "minF $dst, $a, $b" %}
7437 ins_encode %{
7438 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7439 %}
7440 ins_pipe( pipe_slow );
7441 %}
7442
7443 // min = java.lang.Math.min(float a, float b)
7444 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7445 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7446 match(Set dst (MinF a b));
7447 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7448 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7449 ins_encode %{
7450 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7451 %}
7452 ins_pipe( pipe_slow );
7453 %}
7454
7455 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7456 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7457 match(Set dst (MinF a b));
7458 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7459
7460 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7461 ins_encode %{
7462 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7463 true /*min*/, true /*single*/);
7464 %}
7465 ins_pipe( pipe_slow );
7466 %}
7467
7468 // max = java.lang.Math.min(double a, double b)
7469 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7470 predicate(VM_Version::supports_avx10_2());
7471 match(Set dst (MinD a b));
7472 format %{ "minD $dst, $a, $b" %}
7473 ins_encode %{
7474 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7475 %}
7476 ins_pipe( pipe_slow );
7477 %}
7478
7479 // min = java.lang.Math.min(double a, double b)
7480 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7481 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7482 match(Set dst (MinD a b));
7483 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7484 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7485 ins_encode %{
7486 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7487 %}
7488 ins_pipe( pipe_slow );
7489 %}
7490
7491 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7492 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7493 match(Set dst (MinD a b));
7494 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7495
7496 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7497 ins_encode %{
7498 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7499 true /*min*/, false /*single*/);
7500 %}
7501 ins_pipe( pipe_slow );
7502 %}
7503
7504 // Load Effective Address
7505 instruct leaP8(rRegP dst, indOffset8 mem)
7506 %{
7507 match(Set dst mem);
7508
7509 ins_cost(110); // XXX
7510 format %{ "leaq $dst, $mem\t# ptr 8" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaP32(rRegP dst, indOffset32 mem)
7518 %{
7519 match(Set dst mem);
7520
7521 ins_cost(110);
7522 format %{ "leaq $dst, $mem\t# ptr 32" %}
7523 ins_encode %{
7524 __ leaq($dst$$Register, $mem$$Address);
7525 %}
7526 ins_pipe(ialu_reg_reg_fat);
7527 %}
7528
7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7530 %{
7531 match(Set dst mem);
7532
7533 ins_cost(110);
7534 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7535 ins_encode %{
7536 __ leaq($dst$$Register, $mem$$Address);
7537 %}
7538 ins_pipe(ialu_reg_reg_fat);
7539 %}
7540
7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7542 %{
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7554 %{
7555 match(Set dst mem);
7556
7557 ins_cost(110);
7558 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7559 ins_encode %{
7560 __ leaq($dst$$Register, $mem$$Address);
7561 %}
7562 ins_pipe(ialu_reg_reg_fat);
7563 %}
7564
7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7590 %{
7591 match(Set dst mem);
7592
7593 ins_cost(110);
7594 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7595 ins_encode %{
7596 __ leaq($dst$$Register, $mem$$Address);
7597 %}
7598 ins_pipe(ialu_reg_reg_fat);
7599 %}
7600
7601 // Load Effective Address which uses Narrow (32-bits) oop
7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7603 %{
7604 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7605 match(Set dst mem);
7606
7607 ins_cost(110);
7608 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7609 ins_encode %{
7610 __ leaq($dst$$Register, $mem$$Address);
7611 %}
7612 ins_pipe(ialu_reg_reg_fat);
7613 %}
7614
7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7616 %{
7617 predicate(CompressedOops::shift() == 0);
7618 match(Set dst mem);
7619
7620 ins_cost(110); // XXX
7621 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7622 ins_encode %{
7623 __ leaq($dst$$Register, $mem$$Address);
7624 %}
7625 ins_pipe(ialu_reg_reg_fat);
7626 %}
7627
7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7629 %{
7630 predicate(CompressedOops::shift() == 0);
7631 match(Set dst mem);
7632
7633 ins_cost(110);
7634 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7635 ins_encode %{
7636 __ leaq($dst$$Register, $mem$$Address);
7637 %}
7638 ins_pipe(ialu_reg_reg_fat);
7639 %}
7640
7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7642 %{
7643 predicate(CompressedOops::shift() == 0);
7644 match(Set dst mem);
7645
7646 ins_cost(110);
7647 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7648 ins_encode %{
7649 __ leaq($dst$$Register, $mem$$Address);
7650 %}
7651 ins_pipe(ialu_reg_reg_fat);
7652 %}
7653
7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7655 %{
7656 predicate(CompressedOops::shift() == 0);
7657 match(Set dst mem);
7658
7659 ins_cost(110);
7660 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7661 ins_encode %{
7662 __ leaq($dst$$Register, $mem$$Address);
7663 %}
7664 ins_pipe(ialu_reg_reg_fat);
7665 %}
7666
7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7668 %{
7669 predicate(CompressedOops::shift() == 0);
7670 match(Set dst mem);
7671
7672 ins_cost(110);
7673 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7674 ins_encode %{
7675 __ leaq($dst$$Register, $mem$$Address);
7676 %}
7677 ins_pipe(ialu_reg_reg_fat);
7678 %}
7679
7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7681 %{
7682 predicate(CompressedOops::shift() == 0);
7683 match(Set dst mem);
7684
7685 ins_cost(110);
7686 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7687 ins_encode %{
7688 __ leaq($dst$$Register, $mem$$Address);
7689 %}
7690 ins_pipe(ialu_reg_reg_fat);
7691 %}
7692
7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7694 %{
7695 predicate(CompressedOops::shift() == 0);
7696 match(Set dst mem);
7697
7698 ins_cost(110);
7699 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7700 ins_encode %{
7701 __ leaq($dst$$Register, $mem$$Address);
7702 %}
7703 ins_pipe(ialu_reg_reg_fat);
7704 %}
7705
7706 instruct loadConI(rRegI dst, immI src)
7707 %{
7708 match(Set dst src);
7709
7710 format %{ "movl $dst, $src\t# int" %}
7711 ins_encode %{
7712 __ movl($dst$$Register, $src$$constant);
7713 %}
7714 ins_pipe(ialu_reg_fat); // XXX
7715 %}
7716
7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7718 %{
7719 match(Set dst src);
7720 effect(KILL cr);
7721
7722 ins_cost(50);
7723 format %{ "xorl $dst, $dst\t# int" %}
7724 ins_encode %{
7725 __ xorl($dst$$Register, $dst$$Register);
7726 %}
7727 ins_pipe(ialu_reg);
7728 %}
7729
7730 instruct loadConL(rRegL dst, immL src)
7731 %{
7732 match(Set dst src);
7733
7734 ins_cost(150);
7735 format %{ "movq $dst, $src\t# long" %}
7736 ins_encode %{
7737 __ mov64($dst$$Register, $src$$constant);
7738 %}
7739 ins_pipe(ialu_reg);
7740 %}
7741
7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7743 %{
7744 match(Set dst src);
7745 effect(KILL cr);
7746
7747 ins_cost(50);
7748 format %{ "xorl $dst, $dst\t# long" %}
7749 ins_encode %{
7750 __ xorl($dst$$Register, $dst$$Register);
7751 %}
7752 ins_pipe(ialu_reg); // XXX
7753 %}
7754
7755 instruct loadConUL32(rRegL dst, immUL32 src)
7756 %{
7757 match(Set dst src);
7758
7759 ins_cost(60);
7760 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7761 ins_encode %{
7762 __ movl($dst$$Register, $src$$constant);
7763 %}
7764 ins_pipe(ialu_reg);
7765 %}
7766
7767 instruct loadConL32(rRegL dst, immL32 src)
7768 %{
7769 match(Set dst src);
7770
7771 ins_cost(70);
7772 format %{ "movq $dst, $src\t# long (32-bit)" %}
7773 ins_encode %{
7774 __ movq($dst$$Register, $src$$constant);
7775 %}
7776 ins_pipe(ialu_reg);
7777 %}
7778
7779 instruct loadConP(rRegP dst, immP con) %{
7780 match(Set dst con);
7781
7782 format %{ "movq $dst, $con\t# ptr" %}
7783 ins_encode %{
7784 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7785 %}
7786 ins_pipe(ialu_reg_fat); // XXX
7787 %}
7788
7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7790 %{
7791 match(Set dst src);
7792 effect(KILL cr);
7793
7794 ins_cost(50);
7795 format %{ "xorl $dst, $dst\t# ptr" %}
7796 ins_encode %{
7797 __ xorl($dst$$Register, $dst$$Register);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7803 %{
7804 match(Set dst src);
7805 effect(KILL cr);
7806
7807 ins_cost(60);
7808 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7809 ins_encode %{
7810 __ movl($dst$$Register, $src$$constant);
7811 %}
7812 ins_pipe(ialu_reg);
7813 %}
7814
7815 instruct loadConF(regF dst, immF con) %{
7816 match(Set dst con);
7817 ins_cost(125);
7818 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7819 ins_encode %{
7820 __ movflt($dst$$XMMRegister, $constantaddress($con));
7821 %}
7822 ins_pipe(pipe_slow);
7823 %}
7824
7825 instruct loadConH(regF dst, immH con) %{
7826 match(Set dst con);
7827 ins_cost(125);
7828 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7829 ins_encode %{
7830 __ movflt($dst$$XMMRegister, $constantaddress($con));
7831 %}
7832 ins_pipe(pipe_slow);
7833 %}
7834
7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7836 match(Set dst src);
7837 effect(KILL cr);
7838 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7839 ins_encode %{
7840 __ xorq($dst$$Register, $dst$$Register);
7841 %}
7842 ins_pipe(ialu_reg);
7843 %}
7844
7845 instruct loadConN(rRegN dst, immN src) %{
7846 match(Set dst src);
7847
7848 ins_cost(125);
7849 format %{ "movl $dst, $src\t# compressed ptr" %}
7850 ins_encode %{
7851 address con = (address)$src$$constant;
7852 if (con == nullptr) {
7853 ShouldNotReachHere();
7854 } else {
7855 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7856 }
7857 %}
7858 ins_pipe(ialu_reg_fat); // XXX
7859 %}
7860
7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7862 match(Set dst src);
7863
7864 ins_cost(125);
7865 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7866 ins_encode %{
7867 address con = (address)$src$$constant;
7868 if (con == nullptr) {
7869 ShouldNotReachHere();
7870 } else {
7871 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7872 }
7873 %}
7874 ins_pipe(ialu_reg_fat); // XXX
7875 %}
7876
7877 instruct loadConF0(regF dst, immF0 src)
7878 %{
7879 match(Set dst src);
7880 ins_cost(100);
7881
7882 format %{ "xorps $dst, $dst\t# float 0.0" %}
7883 ins_encode %{
7884 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7885 %}
7886 ins_pipe(pipe_slow);
7887 %}
7888
7889 // Use the same format since predicate() can not be used here.
7890 instruct loadConD(regD dst, immD con) %{
7891 match(Set dst con);
7892 ins_cost(125);
7893 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7894 ins_encode %{
7895 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7896 %}
7897 ins_pipe(pipe_slow);
7898 %}
7899
7900 instruct loadConD0(regD dst, immD0 src)
7901 %{
7902 match(Set dst src);
7903 ins_cost(100);
7904
7905 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7906 ins_encode %{
7907 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7908 %}
7909 ins_pipe(pipe_slow);
7910 %}
7911
7912 instruct loadSSI(rRegI dst, stackSlotI src)
7913 %{
7914 match(Set dst src);
7915
7916 ins_cost(125);
7917 format %{ "movl $dst, $src\t# int stk" %}
7918 ins_encode %{
7919 __ movl($dst$$Register, $src$$Address);
7920 %}
7921 ins_pipe(ialu_reg_mem);
7922 %}
7923
7924 instruct loadSSL(rRegL dst, stackSlotL src)
7925 %{
7926 match(Set dst src);
7927
7928 ins_cost(125);
7929 format %{ "movq $dst, $src\t# long stk" %}
7930 ins_encode %{
7931 __ movq($dst$$Register, $src$$Address);
7932 %}
7933 ins_pipe(ialu_reg_mem);
7934 %}
7935
7936 instruct loadSSP(rRegP dst, stackSlotP src)
7937 %{
7938 match(Set dst src);
7939
7940 ins_cost(125);
7941 format %{ "movq $dst, $src\t# ptr stk" %}
7942 ins_encode %{
7943 __ movq($dst$$Register, $src$$Address);
7944 %}
7945 ins_pipe(ialu_reg_mem);
7946 %}
7947
7948 instruct loadSSF(regF dst, stackSlotF src)
7949 %{
7950 match(Set dst src);
7951
7952 ins_cost(125);
7953 format %{ "movss $dst, $src\t# float stk" %}
7954 ins_encode %{
7955 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7956 %}
7957 ins_pipe(pipe_slow); // XXX
7958 %}
7959
7960 // Use the same format since predicate() can not be used here.
7961 instruct loadSSD(regD dst, stackSlotD src)
7962 %{
7963 match(Set dst src);
7964
7965 ins_cost(125);
7966 format %{ "movsd $dst, $src\t# double stk" %}
7967 ins_encode %{
7968 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7969 %}
7970 ins_pipe(pipe_slow); // XXX
7971 %}
7972
7973 // Prefetch instructions for allocation.
7974 // Must be safe to execute with invalid address (cannot fault).
7975
7976 instruct prefetchAlloc( memory mem ) %{
7977 predicate(AllocatePrefetchInstr==3);
7978 match(PrefetchAllocation mem);
7979 ins_cost(125);
7980
7981 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7982 ins_encode %{
7983 __ prefetchw($mem$$Address);
7984 %}
7985 ins_pipe(ialu_mem);
7986 %}
7987
7988 instruct prefetchAllocNTA( memory mem ) %{
7989 predicate(AllocatePrefetchInstr==0);
7990 match(PrefetchAllocation mem);
7991 ins_cost(125);
7992
7993 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7994 ins_encode %{
7995 __ prefetchnta($mem$$Address);
7996 %}
7997 ins_pipe(ialu_mem);
7998 %}
7999
8000 instruct prefetchAllocT0( memory mem ) %{
8001 predicate(AllocatePrefetchInstr==1);
8002 match(PrefetchAllocation mem);
8003 ins_cost(125);
8004
8005 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8006 ins_encode %{
8007 __ prefetcht0($mem$$Address);
8008 %}
8009 ins_pipe(ialu_mem);
8010 %}
8011
8012 instruct prefetchAllocT2( memory mem ) %{
8013 predicate(AllocatePrefetchInstr==2);
8014 match(PrefetchAllocation mem);
8015 ins_cost(125);
8016
8017 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8018 ins_encode %{
8019 __ prefetcht2($mem$$Address);
8020 %}
8021 ins_pipe(ialu_mem);
8022 %}
8023
8024 //----------Store Instructions-------------------------------------------------
8025
8026 // Store Byte
8027 instruct storeB(memory mem, rRegI src)
8028 %{
8029 match(Set mem (StoreB mem src));
8030
8031 ins_cost(125); // XXX
8032 format %{ "movb $mem, $src\t# byte" %}
8033 ins_encode %{
8034 __ movb($mem$$Address, $src$$Register);
8035 %}
8036 ins_pipe(ialu_mem_reg);
8037 %}
8038
8039 // Store Char/Short
8040 instruct storeC(memory mem, rRegI src)
8041 %{
8042 match(Set mem (StoreC mem src));
8043
8044 ins_cost(125); // XXX
8045 format %{ "movw $mem, $src\t# char/short" %}
8046 ins_encode %{
8047 __ movw($mem$$Address, $src$$Register);
8048 %}
8049 ins_pipe(ialu_mem_reg);
8050 %}
8051
8052 // Store Integer
8053 instruct storeI(memory mem, rRegI src)
8054 %{
8055 match(Set mem (StoreI mem src));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, $src\t# int" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, $src$$Register);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 // Store Long
8066 instruct storeL(memory mem, rRegL src)
8067 %{
8068 match(Set mem (StoreL mem src));
8069
8070 ins_cost(125); // XXX
8071 format %{ "movq $mem, $src\t# long" %}
8072 ins_encode %{
8073 __ movq($mem$$Address, $src$$Register);
8074 %}
8075 ins_pipe(ialu_mem_reg); // XXX
8076 %}
8077
8078 // Store Pointer
8079 instruct storeP(memory mem, any_RegP src)
8080 %{
8081 predicate(n->as_Store()->barrier_data() == 0);
8082 match(Set mem (StoreP mem src));
8083
8084 ins_cost(125); // XXX
8085 format %{ "movq $mem, $src\t# ptr" %}
8086 ins_encode %{
8087 __ movq($mem$$Address, $src$$Register);
8088 %}
8089 ins_pipe(ialu_mem_reg);
8090 %}
8091
8092 instruct storeImmP0(memory mem, immP0 zero)
8093 %{
8094 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8095 match(Set mem (StoreP mem zero));
8096
8097 ins_cost(125); // XXX
8098 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8099 ins_encode %{
8100 __ movq($mem$$Address, r12);
8101 %}
8102 ins_pipe(ialu_mem_reg);
8103 %}
8104
8105 // Store Null Pointer, mark word, or other simple pointer constant.
8106 instruct storeImmP(memory mem, immP31 src)
8107 %{
8108 predicate(n->as_Store()->barrier_data() == 0);
8109 match(Set mem (StoreP mem src));
8110
8111 ins_cost(150); // XXX
8112 format %{ "movq $mem, $src\t# ptr" %}
8113 ins_encode %{
8114 __ movq($mem$$Address, $src$$constant);
8115 %}
8116 ins_pipe(ialu_mem_imm);
8117 %}
8118
8119 // Store Compressed Pointer
8120 instruct storeN(memory mem, rRegN src)
8121 %{
8122 predicate(n->as_Store()->barrier_data() == 0);
8123 match(Set mem (StoreN mem src));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, $src\t# compressed ptr" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, $src$$Register);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeNKlass(memory mem, rRegN src)
8134 %{
8135 match(Set mem (StoreNKlass mem src));
8136
8137 ins_cost(125); // XXX
8138 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8139 ins_encode %{
8140 __ movl($mem$$Address, $src$$Register);
8141 %}
8142 ins_pipe(ialu_mem_reg);
8143 %}
8144
8145 instruct storeImmN0(memory mem, immN0 zero)
8146 %{
8147 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8148 match(Set mem (StoreN mem zero));
8149
8150 ins_cost(125); // XXX
8151 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8152 ins_encode %{
8153 __ movl($mem$$Address, r12);
8154 %}
8155 ins_pipe(ialu_mem_reg);
8156 %}
8157
8158 instruct storeImmN(memory mem, immN src)
8159 %{
8160 predicate(n->as_Store()->barrier_data() == 0);
8161 match(Set mem (StoreN mem src));
8162
8163 ins_cost(150); // XXX
8164 format %{ "movl $mem, $src\t# compressed ptr" %}
8165 ins_encode %{
8166 address con = (address)$src$$constant;
8167 if (con == nullptr) {
8168 __ movl($mem$$Address, 0);
8169 } else {
8170 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8171 }
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 instruct storeImmNKlass(memory mem, immNKlass src)
8177 %{
8178 match(Set mem (StoreNKlass mem src));
8179
8180 ins_cost(150); // XXX
8181 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8182 ins_encode %{
8183 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8184 %}
8185 ins_pipe(ialu_mem_imm);
8186 %}
8187
8188 // Store Integer Immediate
8189 instruct storeImmI0(memory mem, immI_0 zero)
8190 %{
8191 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8192 match(Set mem (StoreI mem zero));
8193
8194 ins_cost(125); // XXX
8195 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, r12);
8198 %}
8199 ins_pipe(ialu_mem_reg);
8200 %}
8201
8202 instruct storeImmI(memory mem, immI src)
8203 %{
8204 match(Set mem (StoreI mem src));
8205
8206 ins_cost(150);
8207 format %{ "movl $mem, $src\t# int" %}
8208 ins_encode %{
8209 __ movl($mem$$Address, $src$$constant);
8210 %}
8211 ins_pipe(ialu_mem_imm);
8212 %}
8213
8214 // Store Long Immediate
8215 instruct storeImmL0(memory mem, immL0 zero)
8216 %{
8217 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8218 match(Set mem (StoreL mem zero));
8219
8220 ins_cost(125); // XXX
8221 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8222 ins_encode %{
8223 __ movq($mem$$Address, r12);
8224 %}
8225 ins_pipe(ialu_mem_reg);
8226 %}
8227
8228 instruct storeImmL(memory mem, immL32 src)
8229 %{
8230 match(Set mem (StoreL mem src));
8231
8232 ins_cost(150);
8233 format %{ "movq $mem, $src\t# long" %}
8234 ins_encode %{
8235 __ movq($mem$$Address, $src$$constant);
8236 %}
8237 ins_pipe(ialu_mem_imm);
8238 %}
8239
8240 // Store Short/Char Immediate
8241 instruct storeImmC0(memory mem, immI_0 zero)
8242 %{
8243 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8244 match(Set mem (StoreC mem zero));
8245
8246 ins_cost(125); // XXX
8247 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8248 ins_encode %{
8249 __ movw($mem$$Address, r12);
8250 %}
8251 ins_pipe(ialu_mem_reg);
8252 %}
8253
8254 instruct storeImmI16(memory mem, immI16 src)
8255 %{
8256 predicate(UseStoreImmI16);
8257 match(Set mem (StoreC mem src));
8258
8259 ins_cost(150);
8260 format %{ "movw $mem, $src\t# short/char" %}
8261 ins_encode %{
8262 __ movw($mem$$Address, $src$$constant);
8263 %}
8264 ins_pipe(ialu_mem_imm);
8265 %}
8266
8267 // Store Byte Immediate
8268 instruct storeImmB0(memory mem, immI_0 zero)
8269 %{
8270 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8271 match(Set mem (StoreB mem zero));
8272
8273 ins_cost(125); // XXX
8274 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8275 ins_encode %{
8276 __ movb($mem$$Address, r12);
8277 %}
8278 ins_pipe(ialu_mem_reg);
8279 %}
8280
8281 instruct storeImmB(memory mem, immI8 src)
8282 %{
8283 match(Set mem (StoreB mem src));
8284
8285 ins_cost(150); // XXX
8286 format %{ "movb $mem, $src\t# byte" %}
8287 ins_encode %{
8288 __ movb($mem$$Address, $src$$constant);
8289 %}
8290 ins_pipe(ialu_mem_imm);
8291 %}
8292
8293 // Store Float
8294 instruct storeF(memory mem, regF src)
8295 %{
8296 match(Set mem (StoreF mem src));
8297
8298 ins_cost(95); // XXX
8299 format %{ "movss $mem, $src\t# float" %}
8300 ins_encode %{
8301 __ movflt($mem$$Address, $src$$XMMRegister);
8302 %}
8303 ins_pipe(pipe_slow); // XXX
8304 %}
8305
8306 // Store immediate Float value (it is faster than store from XMM register)
8307 instruct storeF0(memory mem, immF0 zero)
8308 %{
8309 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8310 match(Set mem (StoreF mem zero));
8311
8312 ins_cost(25); // XXX
8313 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8314 ins_encode %{
8315 __ movl($mem$$Address, r12);
8316 %}
8317 ins_pipe(ialu_mem_reg);
8318 %}
8319
8320 instruct storeF_imm(memory mem, immF src)
8321 %{
8322 match(Set mem (StoreF mem src));
8323
8324 ins_cost(50);
8325 format %{ "movl $mem, $src\t# float" %}
8326 ins_encode %{
8327 __ movl($mem$$Address, jint_cast($src$$constant));
8328 %}
8329 ins_pipe(ialu_mem_imm);
8330 %}
8331
8332 // Store Double
8333 instruct storeD(memory mem, regD src)
8334 %{
8335 match(Set mem (StoreD mem src));
8336
8337 ins_cost(95); // XXX
8338 format %{ "movsd $mem, $src\t# double" %}
8339 ins_encode %{
8340 __ movdbl($mem$$Address, $src$$XMMRegister);
8341 %}
8342 ins_pipe(pipe_slow); // XXX
8343 %}
8344
8345 // Store immediate double 0.0 (it is faster than store from XMM register)
8346 instruct storeD0_imm(memory mem, immD0 src)
8347 %{
8348 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8349 match(Set mem (StoreD mem src));
8350
8351 ins_cost(50);
8352 format %{ "movq $mem, $src\t# double 0." %}
8353 ins_encode %{
8354 __ movq($mem$$Address, $src$$constant);
8355 %}
8356 ins_pipe(ialu_mem_imm);
8357 %}
8358
8359 instruct storeD0(memory mem, immD0 zero)
8360 %{
8361 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8362 match(Set mem (StoreD mem zero));
8363
8364 ins_cost(25); // XXX
8365 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8366 ins_encode %{
8367 __ movq($mem$$Address, r12);
8368 %}
8369 ins_pipe(ialu_mem_reg);
8370 %}
8371
8372 instruct storeSSI(stackSlotI dst, rRegI src)
8373 %{
8374 match(Set dst src);
8375
8376 ins_cost(100);
8377 format %{ "movl $dst, $src\t# int stk" %}
8378 ins_encode %{
8379 __ movl($dst$$Address, $src$$Register);
8380 %}
8381 ins_pipe( ialu_mem_reg );
8382 %}
8383
8384 instruct storeSSL(stackSlotL dst, rRegL src)
8385 %{
8386 match(Set dst src);
8387
8388 ins_cost(100);
8389 format %{ "movq $dst, $src\t# long stk" %}
8390 ins_encode %{
8391 __ movq($dst$$Address, $src$$Register);
8392 %}
8393 ins_pipe(ialu_mem_reg);
8394 %}
8395
8396 instruct storeSSP(stackSlotP dst, rRegP src)
8397 %{
8398 match(Set dst src);
8399
8400 ins_cost(100);
8401 format %{ "movq $dst, $src\t# ptr stk" %}
8402 ins_encode %{
8403 __ movq($dst$$Address, $src$$Register);
8404 %}
8405 ins_pipe(ialu_mem_reg);
8406 %}
8407
8408 instruct storeSSF(stackSlotF dst, regF src)
8409 %{
8410 match(Set dst src);
8411
8412 ins_cost(95); // XXX
8413 format %{ "movss $dst, $src\t# float stk" %}
8414 ins_encode %{
8415 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8416 %}
8417 ins_pipe(pipe_slow); // XXX
8418 %}
8419
8420 instruct storeSSD(stackSlotD dst, regD src)
8421 %{
8422 match(Set dst src);
8423
8424 ins_cost(95); // XXX
8425 format %{ "movsd $dst, $src\t# double stk" %}
8426 ins_encode %{
8427 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8428 %}
8429 ins_pipe(pipe_slow); // XXX
8430 %}
8431
8432 instruct cacheWB(indirect addr)
8433 %{
8434 predicate(VM_Version::supports_data_cache_line_flush());
8435 match(CacheWB addr);
8436
8437 ins_cost(100);
8438 format %{"cache wb $addr" %}
8439 ins_encode %{
8440 assert($addr->index_position() < 0, "should be");
8441 assert($addr$$disp == 0, "should be");
8442 __ cache_wb(Address($addr$$base$$Register, 0));
8443 %}
8444 ins_pipe(pipe_slow); // XXX
8445 %}
8446
8447 instruct cacheWBPreSync()
8448 %{
8449 predicate(VM_Version::supports_data_cache_line_flush());
8450 match(CacheWBPreSync);
8451
8452 ins_cost(100);
8453 format %{"cache wb presync" %}
8454 ins_encode %{
8455 __ cache_wbsync(true);
8456 %}
8457 ins_pipe(pipe_slow); // XXX
8458 %}
8459
8460 instruct cacheWBPostSync()
8461 %{
8462 predicate(VM_Version::supports_data_cache_line_flush());
8463 match(CacheWBPostSync);
8464
8465 ins_cost(100);
8466 format %{"cache wb postsync" %}
8467 ins_encode %{
8468 __ cache_wbsync(false);
8469 %}
8470 ins_pipe(pipe_slow); // XXX
8471 %}
8472
8473 //----------BSWAP Instructions-------------------------------------------------
8474 instruct bytes_reverse_int(rRegI dst) %{
8475 match(Set dst (ReverseBytesI dst));
8476
8477 format %{ "bswapl $dst" %}
8478 ins_encode %{
8479 __ bswapl($dst$$Register);
8480 %}
8481 ins_pipe( ialu_reg );
8482 %}
8483
8484 instruct bytes_reverse_long(rRegL dst) %{
8485 match(Set dst (ReverseBytesL dst));
8486
8487 format %{ "bswapq $dst" %}
8488 ins_encode %{
8489 __ bswapq($dst$$Register);
8490 %}
8491 ins_pipe( ialu_reg);
8492 %}
8493
8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8495 match(Set dst (ReverseBytesUS dst));
8496 effect(KILL cr);
8497
8498 format %{ "bswapl $dst\n\t"
8499 "shrl $dst,16\n\t" %}
8500 ins_encode %{
8501 __ bswapl($dst$$Register);
8502 __ shrl($dst$$Register, 16);
8503 %}
8504 ins_pipe( ialu_reg );
8505 %}
8506
8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8508 match(Set dst (ReverseBytesS dst));
8509 effect(KILL cr);
8510
8511 format %{ "bswapl $dst\n\t"
8512 "sar $dst,16\n\t" %}
8513 ins_encode %{
8514 __ bswapl($dst$$Register);
8515 __ sarl($dst$$Register, 16);
8516 %}
8517 ins_pipe( ialu_reg );
8518 %}
8519
8520 //---------- Zeros Count Instructions ------------------------------------------
8521
8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8523 predicate(UseCountLeadingZerosInstruction);
8524 match(Set dst (CountLeadingZerosI src));
8525 effect(KILL cr);
8526
8527 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8528 ins_encode %{
8529 __ lzcntl($dst$$Register, $src$$Register);
8530 %}
8531 ins_pipe(ialu_reg);
8532 %}
8533
8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8535 predicate(UseCountLeadingZerosInstruction);
8536 match(Set dst (CountLeadingZerosI (LoadI src)));
8537 effect(KILL cr);
8538 ins_cost(175);
8539 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8540 ins_encode %{
8541 __ lzcntl($dst$$Register, $src$$Address);
8542 %}
8543 ins_pipe(ialu_reg_mem);
8544 %}
8545
8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8547 predicate(!UseCountLeadingZerosInstruction);
8548 match(Set dst (CountLeadingZerosI src));
8549 effect(KILL cr);
8550
8551 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8552 "jnz skip\n\t"
8553 "movl $dst, -1\n"
8554 "skip:\n\t"
8555 "negl $dst\n\t"
8556 "addl $dst, 31" %}
8557 ins_encode %{
8558 Register Rdst = $dst$$Register;
8559 Register Rsrc = $src$$Register;
8560 Label skip;
8561 __ bsrl(Rdst, Rsrc);
8562 __ jccb(Assembler::notZero, skip);
8563 __ movl(Rdst, -1);
8564 __ bind(skip);
8565 __ negl(Rdst);
8566 __ addl(Rdst, BitsPerInt - 1);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosL src));
8574 effect(KILL cr);
8575
8576 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8577 ins_encode %{
8578 __ lzcntq($dst$$Register, $src$$Register);
8579 %}
8580 ins_pipe(ialu_reg);
8581 %}
8582
8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8584 predicate(UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosL (LoadL src)));
8586 effect(KILL cr);
8587 ins_cost(175);
8588 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8589 ins_encode %{
8590 __ lzcntq($dst$$Register, $src$$Address);
8591 %}
8592 ins_pipe(ialu_reg_mem);
8593 %}
8594
8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8596 predicate(!UseCountLeadingZerosInstruction);
8597 match(Set dst (CountLeadingZerosL src));
8598 effect(KILL cr);
8599
8600 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8601 "jnz skip\n\t"
8602 "movl $dst, -1\n"
8603 "skip:\n\t"
8604 "negl $dst\n\t"
8605 "addl $dst, 63" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Register Rsrc = $src$$Register;
8609 Label skip;
8610 __ bsrq(Rdst, Rsrc);
8611 __ jccb(Assembler::notZero, skip);
8612 __ movl(Rdst, -1);
8613 __ bind(skip);
8614 __ negl(Rdst);
8615 __ addl(Rdst, BitsPerLong - 1);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8621 predicate(UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosI src));
8623 effect(KILL cr);
8624
8625 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8626 ins_encode %{
8627 __ tzcntl($dst$$Register, $src$$Register);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8633 predicate(UseCountTrailingZerosInstruction);
8634 match(Set dst (CountTrailingZerosI (LoadI src)));
8635 effect(KILL cr);
8636 ins_cost(175);
8637 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8638 ins_encode %{
8639 __ tzcntl($dst$$Register, $src$$Address);
8640 %}
8641 ins_pipe(ialu_reg_mem);
8642 %}
8643
8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8645 predicate(!UseCountTrailingZerosInstruction);
8646 match(Set dst (CountTrailingZerosI src));
8647 effect(KILL cr);
8648
8649 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8650 "jnz done\n\t"
8651 "movl $dst, 32\n"
8652 "done:" %}
8653 ins_encode %{
8654 Register Rdst = $dst$$Register;
8655 Label done;
8656 __ bsfl(Rdst, $src$$Register);
8657 __ jccb(Assembler::notZero, done);
8658 __ movl(Rdst, BitsPerInt);
8659 __ bind(done);
8660 %}
8661 ins_pipe(ialu_reg);
8662 %}
8663
8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8665 predicate(UseCountTrailingZerosInstruction);
8666 match(Set dst (CountTrailingZerosL src));
8667 effect(KILL cr);
8668
8669 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8670 ins_encode %{
8671 __ tzcntq($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8677 predicate(UseCountTrailingZerosInstruction);
8678 match(Set dst (CountTrailingZerosL (LoadL src)));
8679 effect(KILL cr);
8680 ins_cost(175);
8681 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8682 ins_encode %{
8683 __ tzcntq($dst$$Register, $src$$Address);
8684 %}
8685 ins_pipe(ialu_reg_mem);
8686 %}
8687
8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8689 predicate(!UseCountTrailingZerosInstruction);
8690 match(Set dst (CountTrailingZerosL src));
8691 effect(KILL cr);
8692
8693 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8694 "jnz done\n\t"
8695 "movl $dst, 64\n"
8696 "done:" %}
8697 ins_encode %{
8698 Register Rdst = $dst$$Register;
8699 Label done;
8700 __ bsfq(Rdst, $src$$Register);
8701 __ jccb(Assembler::notZero, done);
8702 __ movl(Rdst, BitsPerLong);
8703 __ bind(done);
8704 %}
8705 ins_pipe(ialu_reg);
8706 %}
8707
8708 //--------------- Reverse Operation Instructions ----------------
8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8710 predicate(!VM_Version::supports_gfni());
8711 match(Set dst (ReverseI src));
8712 effect(TEMP dst, TEMP rtmp, KILL cr);
8713 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8714 ins_encode %{
8715 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8716 %}
8717 ins_pipe( ialu_reg );
8718 %}
8719
8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8721 predicate(VM_Version::supports_gfni());
8722 match(Set dst (ReverseI src));
8723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8724 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8725 ins_encode %{
8726 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8727 %}
8728 ins_pipe( ialu_reg );
8729 %}
8730
8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8732 predicate(!VM_Version::supports_gfni());
8733 match(Set dst (ReverseL src));
8734 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8735 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8736 ins_encode %{
8737 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8738 %}
8739 ins_pipe( ialu_reg );
8740 %}
8741
8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8743 predicate(VM_Version::supports_gfni());
8744 match(Set dst (ReverseL src));
8745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8746 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8747 ins_encode %{
8748 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8749 %}
8750 ins_pipe( ialu_reg );
8751 %}
8752
8753 //---------- Population Count Instructions -------------------------------------
8754
8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8756 predicate(UsePopCountInstruction);
8757 match(Set dst (PopCountI src));
8758 effect(KILL cr);
8759
8760 format %{ "popcnt $dst, $src" %}
8761 ins_encode %{
8762 __ popcntl($dst$$Register, $src$$Register);
8763 %}
8764 ins_pipe(ialu_reg);
8765 %}
8766
8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8768 predicate(UsePopCountInstruction);
8769 match(Set dst (PopCountI (LoadI mem)));
8770 effect(KILL cr);
8771
8772 format %{ "popcnt $dst, $mem" %}
8773 ins_encode %{
8774 __ popcntl($dst$$Register, $mem$$Address);
8775 %}
8776 ins_pipe(ialu_reg);
8777 %}
8778
8779 // Note: Long.bitCount(long) returns an int.
8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8781 predicate(UsePopCountInstruction);
8782 match(Set dst (PopCountL src));
8783 effect(KILL cr);
8784
8785 format %{ "popcnt $dst, $src" %}
8786 ins_encode %{
8787 __ popcntq($dst$$Register, $src$$Register);
8788 %}
8789 ins_pipe(ialu_reg);
8790 %}
8791
8792 // Note: Long.bitCount(long) returns an int.
8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8794 predicate(UsePopCountInstruction);
8795 match(Set dst (PopCountL (LoadL mem)));
8796 effect(KILL cr);
8797
8798 format %{ "popcnt $dst, $mem" %}
8799 ins_encode %{
8800 __ popcntq($dst$$Register, $mem$$Address);
8801 %}
8802 ins_pipe(ialu_reg);
8803 %}
8804
8805
8806 //----------MemBar Instructions-----------------------------------------------
8807 // Memory barrier flavors
8808
8809 instruct membar_acquire()
8810 %{
8811 match(MemBarAcquire);
8812 match(LoadFence);
8813 ins_cost(0);
8814
8815 size(0);
8816 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8817 ins_encode();
8818 ins_pipe(empty);
8819 %}
8820
8821 instruct membar_acquire_lock()
8822 %{
8823 match(MemBarAcquireLock);
8824 ins_cost(0);
8825
8826 size(0);
8827 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8828 ins_encode();
8829 ins_pipe(empty);
8830 %}
8831
8832 instruct membar_release()
8833 %{
8834 match(MemBarRelease);
8835 match(StoreFence);
8836 ins_cost(0);
8837
8838 size(0);
8839 format %{ "MEMBAR-release ! (empty encoding)" %}
8840 ins_encode();
8841 ins_pipe(empty);
8842 %}
8843
8844 instruct membar_release_lock()
8845 %{
8846 match(MemBarReleaseLock);
8847 ins_cost(0);
8848
8849 size(0);
8850 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8851 ins_encode();
8852 ins_pipe(empty);
8853 %}
8854
8855 instruct membar_storeload(rFlagsReg cr) %{
8856 match(MemBarStoreLoad);
8857 effect(KILL cr);
8858 ins_cost(400);
8859
8860 format %{
8861 $$template
8862 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8863 %}
8864 ins_encode %{
8865 __ membar(Assembler::StoreLoad);
8866 %}
8867 ins_pipe(pipe_slow);
8868 %}
8869
8870 instruct membar_volatile(rFlagsReg cr) %{
8871 match(MemBarVolatile);
8872 effect(KILL cr);
8873 ins_cost(400);
8874
8875 format %{
8876 $$template
8877 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8878 %}
8879 ins_encode %{
8880 __ membar(Assembler::StoreLoad);
8881 %}
8882 ins_pipe(pipe_slow);
8883 %}
8884
8885 instruct unnecessary_membar_volatile()
8886 %{
8887 match(MemBarVolatile);
8888 predicate(Matcher::post_store_load_barrier(n));
8889 ins_cost(0);
8890
8891 size(0);
8892 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8893 ins_encode();
8894 ins_pipe(empty);
8895 %}
8896
8897 instruct membar_full(rFlagsReg cr) %{
8898 match(MemBarFull);
8899 effect(KILL cr);
8900 ins_cost(400);
8901
8902 format %{
8903 $$template
8904 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8905 %}
8906 ins_encode %{
8907 __ membar(Assembler::StoreLoad);
8908 %}
8909 ins_pipe(pipe_slow);
8910 %}
8911
8912 instruct membar_storestore() %{
8913 match(MemBarStoreStore);
8914 match(StoreStoreFence);
8915 ins_cost(0);
8916
8917 size(0);
8918 format %{ "MEMBAR-storestore (empty encoding)" %}
8919 ins_encode( );
8920 ins_pipe(empty);
8921 %}
8922
8923 //----------Move Instructions--------------------------------------------------
8924
8925 instruct castX2P(rRegP dst, rRegL src)
8926 %{
8927 match(Set dst (CastX2P src));
8928
8929 format %{ "movq $dst, $src\t# long->ptr" %}
8930 ins_encode %{
8931 if ($dst$$reg != $src$$reg) {
8932 __ movptr($dst$$Register, $src$$Register);
8933 }
8934 %}
8935 ins_pipe(ialu_reg_reg); // XXX
8936 %}
8937
8938 instruct castP2X(rRegL dst, rRegP src)
8939 %{
8940 match(Set dst (CastP2X src));
8941
8942 format %{ "movq $dst, $src\t# ptr -> long" %}
8943 ins_encode %{
8944 if ($dst$$reg != $src$$reg) {
8945 __ movptr($dst$$Register, $src$$Register);
8946 }
8947 %}
8948 ins_pipe(ialu_reg_reg); // XXX
8949 %}
8950
8951 // Convert oop into int for vectors alignment masking
8952 instruct convP2I(rRegI dst, rRegP src)
8953 %{
8954 match(Set dst (ConvL2I (CastP2X src)));
8955
8956 format %{ "movl $dst, $src\t# ptr -> int" %}
8957 ins_encode %{
8958 __ movl($dst$$Register, $src$$Register);
8959 %}
8960 ins_pipe(ialu_reg_reg); // XXX
8961 %}
8962
8963 // Convert compressed oop into int for vectors alignment masking
8964 // in case of 32bit oops (heap < 4Gb).
8965 instruct convN2I(rRegI dst, rRegN src)
8966 %{
8967 predicate(CompressedOops::shift() == 0);
8968 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8969
8970 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8971 ins_encode %{
8972 __ movl($dst$$Register, $src$$Register);
8973 %}
8974 ins_pipe(ialu_reg_reg); // XXX
8975 %}
8976
8977 // Convert oop pointer into compressed form
8978 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8979 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8980 match(Set dst (EncodeP src));
8981 effect(KILL cr);
8982 format %{ "encode_heap_oop $dst,$src" %}
8983 ins_encode %{
8984 Register s = $src$$Register;
8985 Register d = $dst$$Register;
8986 if (s != d) {
8987 __ movq(d, s);
8988 }
8989 __ encode_heap_oop(d);
8990 %}
8991 ins_pipe(ialu_reg_long);
8992 %}
8993
8994 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8995 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8996 match(Set dst (EncodeP src));
8997 effect(KILL cr);
8998 format %{ "encode_heap_oop_not_null $dst,$src" %}
8999 ins_encode %{
9000 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9001 %}
9002 ins_pipe(ialu_reg_long);
9003 %}
9004
9005 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9006 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9007 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9008 match(Set dst (DecodeN src));
9009 effect(KILL cr);
9010 format %{ "decode_heap_oop $dst,$src" %}
9011 ins_encode %{
9012 Register s = $src$$Register;
9013 Register d = $dst$$Register;
9014 if (s != d) {
9015 __ movq(d, s);
9016 }
9017 __ decode_heap_oop(d);
9018 %}
9019 ins_pipe(ialu_reg_long);
9020 %}
9021
9022 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9023 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9024 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9025 match(Set dst (DecodeN src));
9026 effect(KILL cr);
9027 format %{ "decode_heap_oop_not_null $dst,$src" %}
9028 ins_encode %{
9029 Register s = $src$$Register;
9030 Register d = $dst$$Register;
9031 if (s != d) {
9032 __ decode_heap_oop_not_null(d, s);
9033 } else {
9034 __ decode_heap_oop_not_null(d);
9035 }
9036 %}
9037 ins_pipe(ialu_reg_long);
9038 %}
9039
9040 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9041 match(Set dst (EncodePKlass src));
9042 effect(TEMP dst, KILL cr);
9043 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9044 ins_encode %{
9045 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9046 %}
9047 ins_pipe(ialu_reg_long);
9048 %}
9049
9050 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9051 match(Set dst (DecodeNKlass src));
9052 effect(TEMP dst, KILL cr);
9053 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9054 ins_encode %{
9055 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9056 %}
9057 ins_pipe(ialu_reg_long);
9058 %}
9059
9060 //----------Conditional Move---------------------------------------------------
9061 // Jump
9062 // dummy instruction for generating temp registers
9063 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9064 match(Jump (LShiftL switch_val shift));
9065 ins_cost(350);
9066 predicate(false);
9067 effect(TEMP dest);
9068
9069 format %{ "leaq $dest, [$constantaddress]\n\t"
9070 "jmp [$dest + $switch_val << $shift]\n\t" %}
9071 ins_encode %{
9072 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9073 // to do that and the compiler is using that register as one it can allocate.
9074 // So we build it all by hand.
9075 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9076 // ArrayAddress dispatch(table, index);
9077 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9078 __ lea($dest$$Register, $constantaddress);
9079 __ jmp(dispatch);
9080 %}
9081 ins_pipe(pipe_jmp);
9082 %}
9083
9084 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9085 match(Jump (AddL (LShiftL switch_val shift) offset));
9086 ins_cost(350);
9087 effect(TEMP dest);
9088
9089 format %{ "leaq $dest, [$constantaddress]\n\t"
9090 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9091 ins_encode %{
9092 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9093 // to do that and the compiler is using that register as one it can allocate.
9094 // So we build it all by hand.
9095 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9096 // ArrayAddress dispatch(table, index);
9097 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9098 __ lea($dest$$Register, $constantaddress);
9099 __ jmp(dispatch);
9100 %}
9101 ins_pipe(pipe_jmp);
9102 %}
9103
9104 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9105 match(Jump switch_val);
9106 ins_cost(350);
9107 effect(TEMP dest);
9108
9109 format %{ "leaq $dest, [$constantaddress]\n\t"
9110 "jmp [$dest + $switch_val]\n\t" %}
9111 ins_encode %{
9112 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9113 // to do that and the compiler is using that register as one it can allocate.
9114 // So we build it all by hand.
9115 // Address index(noreg, switch_reg, Address::times_1);
9116 // ArrayAddress dispatch(table, index);
9117 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9118 __ lea($dest$$Register, $constantaddress);
9119 __ jmp(dispatch);
9120 %}
9121 ins_pipe(pipe_jmp);
9122 %}
9123
9124 // Conditional move
9125 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9126 %{
9127 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9128 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9129
9130 ins_cost(100); // XXX
9131 format %{ "setbn$cop $dst\t# signed, int" %}
9132 ins_encode %{
9133 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9134 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9135 %}
9136 ins_pipe(ialu_reg);
9137 %}
9138
9139 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9140 %{
9141 predicate(!UseAPX);
9142 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9143
9144 ins_cost(200); // XXX
9145 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9146 ins_encode %{
9147 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9148 %}
9149 ins_pipe(pipe_cmov_reg);
9150 %}
9151
9152 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9153 %{
9154 predicate(UseAPX);
9155 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9156
9157 ins_cost(200);
9158 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9159 ins_encode %{
9160 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9161 %}
9162 ins_pipe(pipe_cmov_reg);
9163 %}
9164
9165 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9166 %{
9167 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9168 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9169
9170 ins_cost(100); // XXX
9171 format %{ "setbn$cop $dst\t# unsigned, int" %}
9172 ins_encode %{
9173 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9174 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9175 %}
9176 ins_pipe(ialu_reg);
9177 %}
9178
9179 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9180 predicate(!UseAPX);
9181 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9182
9183 ins_cost(200); // XXX
9184 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9185 ins_encode %{
9186 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9187 %}
9188 ins_pipe(pipe_cmov_reg);
9189 %}
9190
9191 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9192 predicate(UseAPX);
9193 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9194
9195 ins_cost(200);
9196 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9197 ins_encode %{
9198 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9199 %}
9200 ins_pipe(pipe_cmov_reg);
9201 %}
9202
9203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9204 %{
9205 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9206 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9207
9208 ins_cost(100); // XXX
9209 format %{ "setbn$cop $dst\t# unsigned, int" %}
9210 ins_encode %{
9211 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9212 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9213 %}
9214 ins_pipe(ialu_reg);
9215 %}
9216
9217 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9218 %{
9219 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9220 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9221
9222 ins_cost(100); // XXX
9223 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9224 ins_encode %{
9225 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9226 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9227 %}
9228 ins_pipe(ialu_reg);
9229 %}
9230
9231 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9232 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9233
9234 ins_cost(200);
9235 expand %{
9236 cmovI_regU(cop, cr, dst, src);
9237 %}
9238 %}
9239
9240 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9241 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9242
9243 ins_cost(200);
9244 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9245 ins_encode %{
9246 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9247 %}
9248 ins_pipe(pipe_cmov_reg);
9249 %}
9250
9251 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9252 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9253 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9254
9255 ins_cost(200); // XXX
9256 format %{ "cmovpl $dst, $src\n\t"
9257 "cmovnel $dst, $src" %}
9258 ins_encode %{
9259 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9260 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9261 %}
9262 ins_pipe(pipe_cmov_reg);
9263 %}
9264
9265 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9266 // inputs of the CMove
9267 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9268 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9269 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9270 effect(TEMP dst);
9271
9272 ins_cost(200); // XXX
9273 format %{ "cmovpl $dst, $src\n\t"
9274 "cmovnel $dst, $src" %}
9275 ins_encode %{
9276 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9277 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9278 %}
9279 ins_pipe(pipe_cmov_reg);
9280 %}
9281
9282 // Conditional move
9283 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9284 predicate(!UseAPX);
9285 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9286
9287 ins_cost(250); // XXX
9288 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9289 ins_encode %{
9290 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9291 %}
9292 ins_pipe(pipe_cmov_mem);
9293 %}
9294
9295 // Conditional move
9296 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9297 %{
9298 predicate(UseAPX);
9299 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9300
9301 ins_cost(250);
9302 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9303 ins_encode %{
9304 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9305 %}
9306 ins_pipe(pipe_cmov_mem);
9307 %}
9308
9309 // Conditional move
9310 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9311 %{
9312 predicate(!UseAPX);
9313 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9314
9315 ins_cost(250); // XXX
9316 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9317 ins_encode %{
9318 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9319 %}
9320 ins_pipe(pipe_cmov_mem);
9321 %}
9322
9323 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9324 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9325
9326 ins_cost(250);
9327 expand %{
9328 cmovI_memU(cop, cr, dst, src);
9329 %}
9330 %}
9331
9332 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9333 %{
9334 predicate(UseAPX);
9335 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9336
9337 ins_cost(250);
9338 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9339 ins_encode %{
9340 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9341 %}
9342 ins_pipe(pipe_cmov_mem);
9343 %}
9344
9345 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9346 %{
9347 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9348
9349 ins_cost(250);
9350 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9351 ins_encode %{
9352 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9353 %}
9354 ins_pipe(pipe_cmov_mem);
9355 %}
9356
9357 // Conditional move
9358 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9359 %{
9360 predicate(!UseAPX);
9361 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9362
9363 ins_cost(200); // XXX
9364 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9365 ins_encode %{
9366 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9367 %}
9368 ins_pipe(pipe_cmov_reg);
9369 %}
9370
9371 // Conditional move ndd
9372 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9373 %{
9374 predicate(UseAPX);
9375 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9376
9377 ins_cost(200);
9378 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9379 ins_encode %{
9380 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9381 %}
9382 ins_pipe(pipe_cmov_reg);
9383 %}
9384
9385 // Conditional move
9386 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9387 %{
9388 predicate(!UseAPX);
9389 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9390
9391 ins_cost(200); // XXX
9392 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9393 ins_encode %{
9394 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9395 %}
9396 ins_pipe(pipe_cmov_reg);
9397 %}
9398
9399 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9400 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9401
9402 ins_cost(200);
9403 expand %{
9404 cmovN_regU(cop, cr, dst, src);
9405 %}
9406 %}
9407
9408 // Conditional move ndd
9409 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9410 %{
9411 predicate(UseAPX);
9412 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9413
9414 ins_cost(200);
9415 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9416 ins_encode %{
9417 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9418 %}
9419 ins_pipe(pipe_cmov_reg);
9420 %}
9421
9422 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9423 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9424
9425 ins_cost(200);
9426 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9427 ins_encode %{
9428 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9429 %}
9430 ins_pipe(pipe_cmov_reg);
9431 %}
9432
9433 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9434 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9435 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9436
9437 ins_cost(200); // XXX
9438 format %{ "cmovpl $dst, $src\n\t"
9439 "cmovnel $dst, $src" %}
9440 ins_encode %{
9441 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9442 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9443 %}
9444 ins_pipe(pipe_cmov_reg);
9445 %}
9446
9447 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9448 // inputs of the CMove
9449 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9450 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9451 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9452
9453 ins_cost(200); // XXX
9454 format %{ "cmovpl $dst, $src\n\t"
9455 "cmovnel $dst, $src" %}
9456 ins_encode %{
9457 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9458 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9459 %}
9460 ins_pipe(pipe_cmov_reg);
9461 %}
9462
9463 // Conditional move
9464 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9465 %{
9466 predicate(!UseAPX);
9467 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9468
9469 ins_cost(200); // XXX
9470 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9471 ins_encode %{
9472 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9473 %}
9474 ins_pipe(pipe_cmov_reg); // XXX
9475 %}
9476
9477 // Conditional move ndd
9478 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9479 %{
9480 predicate(UseAPX);
9481 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9482
9483 ins_cost(200);
9484 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9485 ins_encode %{
9486 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9487 %}
9488 ins_pipe(pipe_cmov_reg);
9489 %}
9490
9491 // Conditional move
9492 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9493 %{
9494 predicate(!UseAPX);
9495 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9496
9497 ins_cost(200); // XXX
9498 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9499 ins_encode %{
9500 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9501 %}
9502 ins_pipe(pipe_cmov_reg); // XXX
9503 %}
9504
9505 // Conditional move ndd
9506 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9507 %{
9508 predicate(UseAPX);
9509 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9510
9511 ins_cost(200);
9512 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9513 ins_encode %{
9514 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9515 %}
9516 ins_pipe(pipe_cmov_reg);
9517 %}
9518
9519 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9520 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9521
9522 ins_cost(200);
9523 expand %{
9524 cmovP_regU(cop, cr, dst, src);
9525 %}
9526 %}
9527
9528 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9529 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9530
9531 ins_cost(200);
9532 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9533 ins_encode %{
9534 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9535 %}
9536 ins_pipe(pipe_cmov_reg);
9537 %}
9538
9539 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9540 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9541 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9542
9543 ins_cost(200); // XXX
9544 format %{ "cmovpq $dst, $src\n\t"
9545 "cmovneq $dst, $src" %}
9546 ins_encode %{
9547 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9548 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9549 %}
9550 ins_pipe(pipe_cmov_reg);
9551 %}
9552
9553 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9554 // inputs of the CMove
9555 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9556 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9557 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9558
9559 ins_cost(200); // XXX
9560 format %{ "cmovpq $dst, $src\n\t"
9561 "cmovneq $dst, $src" %}
9562 ins_encode %{
9563 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9564 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9565 %}
9566 ins_pipe(pipe_cmov_reg);
9567 %}
9568
9569 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9570 %{
9571 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9572 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9573
9574 ins_cost(100); // XXX
9575 format %{ "setbn$cop $dst\t# signed, long" %}
9576 ins_encode %{
9577 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9578 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9579 %}
9580 ins_pipe(ialu_reg);
9581 %}
9582
9583 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9584 %{
9585 predicate(!UseAPX);
9586 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9587
9588 ins_cost(200); // XXX
9589 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9590 ins_encode %{
9591 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9592 %}
9593 ins_pipe(pipe_cmov_reg); // XXX
9594 %}
9595
9596 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9597 %{
9598 predicate(UseAPX);
9599 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9600
9601 ins_cost(200);
9602 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9603 ins_encode %{
9604 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9605 %}
9606 ins_pipe(pipe_cmov_reg);
9607 %}
9608
9609 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9610 %{
9611 predicate(!UseAPX);
9612 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9613
9614 ins_cost(200); // XXX
9615 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9616 ins_encode %{
9617 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9618 %}
9619 ins_pipe(pipe_cmov_mem); // XXX
9620 %}
9621
9622 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9623 %{
9624 predicate(UseAPX);
9625 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9626
9627 ins_cost(200);
9628 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9629 ins_encode %{
9630 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9631 %}
9632 ins_pipe(pipe_cmov_mem);
9633 %}
9634
9635 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9636 %{
9637 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9638 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9639
9640 ins_cost(100); // XXX
9641 format %{ "setbn$cop $dst\t# unsigned, long" %}
9642 ins_encode %{
9643 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9644 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9645 %}
9646 ins_pipe(ialu_reg);
9647 %}
9648
9649 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9650 %{
9651 predicate(!UseAPX);
9652 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9653
9654 ins_cost(200); // XXX
9655 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9656 ins_encode %{
9657 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9658 %}
9659 ins_pipe(pipe_cmov_reg); // XXX
9660 %}
9661
9662 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9663 %{
9664 predicate(UseAPX);
9665 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9666
9667 ins_cost(200);
9668 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9669 ins_encode %{
9670 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9671 %}
9672 ins_pipe(pipe_cmov_reg);
9673 %}
9674
9675 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9676 %{
9677 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9678 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9679
9680 ins_cost(100); // XXX
9681 format %{ "setbn$cop $dst\t# unsigned, long" %}
9682 ins_encode %{
9683 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9684 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9685 %}
9686 ins_pipe(ialu_reg);
9687 %}
9688
9689 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9690 %{
9691 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9692 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9693
9694 ins_cost(100); // XXX
9695 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9696 ins_encode %{
9697 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9698 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9699 %}
9700 ins_pipe(ialu_reg);
9701 %}
9702
9703 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9704 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9705
9706 ins_cost(200);
9707 expand %{
9708 cmovL_regU(cop, cr, dst, src);
9709 %}
9710 %}
9711
9712 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9713 %{
9714 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9715
9716 ins_cost(200);
9717 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9718 ins_encode %{
9719 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9720 %}
9721 ins_pipe(pipe_cmov_reg);
9722 %}
9723
9724 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9725 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9726 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9727
9728 ins_cost(200); // XXX
9729 format %{ "cmovpq $dst, $src\n\t"
9730 "cmovneq $dst, $src" %}
9731 ins_encode %{
9732 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9733 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9734 %}
9735 ins_pipe(pipe_cmov_reg);
9736 %}
9737
9738 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9739 // inputs of the CMove
9740 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9741 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9742 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9743
9744 ins_cost(200); // XXX
9745 format %{ "cmovpq $dst, $src\n\t"
9746 "cmovneq $dst, $src" %}
9747 ins_encode %{
9748 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9749 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9750 %}
9751 ins_pipe(pipe_cmov_reg);
9752 %}
9753
9754 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9755 %{
9756 predicate(!UseAPX);
9757 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9758
9759 ins_cost(200); // XXX
9760 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9761 ins_encode %{
9762 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9763 %}
9764 ins_pipe(pipe_cmov_mem); // XXX
9765 %}
9766
9767 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9768 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9769
9770 ins_cost(200);
9771 expand %{
9772 cmovL_memU(cop, cr, dst, src);
9773 %}
9774 %}
9775
9776 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9777 %{
9778 predicate(UseAPX);
9779 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9780
9781 ins_cost(200);
9782 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9783 ins_encode %{
9784 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9785 %}
9786 ins_pipe(pipe_cmov_mem);
9787 %}
9788
9789 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9790 %{
9791 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9792
9793 ins_cost(200);
9794 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9795 ins_encode %{
9796 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9797 %}
9798 ins_pipe(pipe_cmov_mem);
9799 %}
9800
9801 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9802 %{
9803 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9804
9805 ins_cost(200); // XXX
9806 format %{ "jn$cop skip\t# signed cmove float\n\t"
9807 "movss $dst, $src\n"
9808 "skip:" %}
9809 ins_encode %{
9810 Label Lskip;
9811 // Invert sense of branch from sense of CMOV
9812 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9813 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9814 __ bind(Lskip);
9815 %}
9816 ins_pipe(pipe_slow);
9817 %}
9818
9819 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9820 %{
9821 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9822
9823 ins_cost(200); // XXX
9824 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9825 "movss $dst, $src\n"
9826 "skip:" %}
9827 ins_encode %{
9828 Label Lskip;
9829 // Invert sense of branch from sense of CMOV
9830 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9831 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9832 __ bind(Lskip);
9833 %}
9834 ins_pipe(pipe_slow);
9835 %}
9836
9837 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9838 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9839
9840 ins_cost(200);
9841 expand %{
9842 cmovF_regU(cop, cr, dst, src);
9843 %}
9844 %}
9845
9846 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9847 %{
9848 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9849
9850 ins_cost(200); // XXX
9851 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9852 "movss $dst, $src\n"
9853 "skip:" %}
9854 ins_encode %{
9855 Label Lskip;
9856 // Invert sense of branch from sense of CMOV
9857 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9858 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9859 __ bind(Lskip);
9860 %}
9861 ins_pipe(pipe_slow);
9862 %}
9863
9864 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9865 %{
9866 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9867
9868 ins_cost(200); // XXX
9869 format %{ "jn$cop skip\t# signed cmove double\n\t"
9870 "movsd $dst, $src\n"
9871 "skip:" %}
9872 ins_encode %{
9873 Label Lskip;
9874 // Invert sense of branch from sense of CMOV
9875 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9876 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9877 __ bind(Lskip);
9878 %}
9879 ins_pipe(pipe_slow);
9880 %}
9881
9882 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9883 %{
9884 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9885
9886 ins_cost(200); // XXX
9887 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9888 "movsd $dst, $src\n"
9889 "skip:" %}
9890 ins_encode %{
9891 Label Lskip;
9892 // Invert sense of branch from sense of CMOV
9893 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9894 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9895 __ bind(Lskip);
9896 %}
9897 ins_pipe(pipe_slow);
9898 %}
9899
9900 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9901 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9902
9903 ins_cost(200);
9904 expand %{
9905 cmovD_regU(cop, cr, dst, src);
9906 %}
9907 %}
9908
9909 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9910 %{
9911 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9912
9913 ins_cost(200); // XXX
9914 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9915 "movsd $dst, $src\n"
9916 "skip:" %}
9917 ins_encode %{
9918 Label Lskip;
9919 // Invert sense of branch from sense of CMOV
9920 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9921 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9922 __ bind(Lskip);
9923 %}
9924 ins_pipe(pipe_slow);
9925 %}
9926
9927 //----------Arithmetic Instructions--------------------------------------------
9928 //----------Addition Instructions----------------------------------------------
9929
9930 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9931 %{
9932 predicate(!UseAPX);
9933 match(Set dst (AddI dst src));
9934 effect(KILL cr);
9935 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9936 format %{ "addl $dst, $src\t# int" %}
9937 ins_encode %{
9938 __ addl($dst$$Register, $src$$Register);
9939 %}
9940 ins_pipe(ialu_reg_reg);
9941 %}
9942
9943 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9944 %{
9945 predicate(UseAPX);
9946 match(Set dst (AddI src1 src2));
9947 effect(KILL cr);
9948 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9949
9950 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9951 ins_encode %{
9952 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9953 %}
9954 ins_pipe(ialu_reg_reg);
9955 %}
9956
9957 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9958 %{
9959 predicate(!UseAPX);
9960 match(Set dst (AddI dst src));
9961 effect(KILL cr);
9962 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9963
9964 format %{ "addl $dst, $src\t# int" %}
9965 ins_encode %{
9966 __ addl($dst$$Register, $src$$constant);
9967 %}
9968 ins_pipe( ialu_reg );
9969 %}
9970
9971 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9972 %{
9973 predicate(UseAPX);
9974 match(Set dst (AddI src1 src2));
9975 effect(KILL cr);
9976 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9977
9978 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9979 ins_encode %{
9980 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9981 %}
9982 ins_pipe( ialu_reg );
9983 %}
9984
9985 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9986 %{
9987 predicate(UseAPX);
9988 match(Set dst (AddI (LoadI src1) src2));
9989 effect(KILL cr);
9990 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9991
9992 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9993 ins_encode %{
9994 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9995 %}
9996 ins_pipe( ialu_reg );
9997 %}
9998
9999 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10000 %{
10001 predicate(!UseAPX);
10002 match(Set dst (AddI dst (LoadI src)));
10003 effect(KILL cr);
10004 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005
10006 ins_cost(150); // XXX
10007 format %{ "addl $dst, $src\t# int" %}
10008 ins_encode %{
10009 __ addl($dst$$Register, $src$$Address);
10010 %}
10011 ins_pipe(ialu_reg_mem);
10012 %}
10013
10014 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10015 %{
10016 predicate(UseAPX);
10017 match(Set dst (AddI src1 (LoadI src2)));
10018 effect(KILL cr);
10019 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10020
10021 ins_cost(150);
10022 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10023 ins_encode %{
10024 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10025 %}
10026 ins_pipe(ialu_reg_mem);
10027 %}
10028
10029 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10030 %{
10031 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10032 effect(KILL cr);
10033 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10034
10035 ins_cost(150); // XXX
10036 format %{ "addl $dst, $src\t# int" %}
10037 ins_encode %{
10038 __ addl($dst$$Address, $src$$Register);
10039 %}
10040 ins_pipe(ialu_mem_reg);
10041 %}
10042
10043 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10044 %{
10045 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046 effect(KILL cr);
10047 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048
10049
10050 ins_cost(125); // XXX
10051 format %{ "addl $dst, $src\t# int" %}
10052 ins_encode %{
10053 __ addl($dst$$Address, $src$$constant);
10054 %}
10055 ins_pipe(ialu_mem_imm);
10056 %}
10057
10058 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10059 %{
10060 predicate(!UseAPX && UseIncDec);
10061 match(Set dst (AddI dst src));
10062 effect(KILL cr);
10063
10064 format %{ "incl $dst\t# int" %}
10065 ins_encode %{
10066 __ incrementl($dst$$Register);
10067 %}
10068 ins_pipe(ialu_reg);
10069 %}
10070
10071 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10072 %{
10073 predicate(UseAPX && UseIncDec);
10074 match(Set dst (AddI src val));
10075 effect(KILL cr);
10076 flag(PD::Flag_ndd_demotable_opr1);
10077
10078 format %{ "eincl $dst, $src\t# int ndd" %}
10079 ins_encode %{
10080 __ eincl($dst$$Register, $src$$Register, false);
10081 %}
10082 ins_pipe(ialu_reg);
10083 %}
10084
10085 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10086 %{
10087 predicate(UseAPX && UseIncDec);
10088 match(Set dst (AddI (LoadI src) val));
10089 effect(KILL cr);
10090
10091 format %{ "eincl $dst, $src\t# int ndd" %}
10092 ins_encode %{
10093 __ eincl($dst$$Register, $src$$Address, false);
10094 %}
10095 ins_pipe(ialu_reg);
10096 %}
10097
10098 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10099 %{
10100 predicate(UseIncDec);
10101 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102 effect(KILL cr);
10103
10104 ins_cost(125); // XXX
10105 format %{ "incl $dst\t# int" %}
10106 ins_encode %{
10107 __ incrementl($dst$$Address);
10108 %}
10109 ins_pipe(ialu_mem_imm);
10110 %}
10111
10112 // XXX why does that use AddI
10113 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10114 %{
10115 predicate(!UseAPX && UseIncDec);
10116 match(Set dst (AddI dst src));
10117 effect(KILL cr);
10118
10119 format %{ "decl $dst\t# int" %}
10120 ins_encode %{
10121 __ decrementl($dst$$Register);
10122 %}
10123 ins_pipe(ialu_reg);
10124 %}
10125
10126 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10127 %{
10128 predicate(UseAPX && UseIncDec);
10129 match(Set dst (AddI src val));
10130 effect(KILL cr);
10131 flag(PD::Flag_ndd_demotable_opr1);
10132
10133 format %{ "edecl $dst, $src\t# int ndd" %}
10134 ins_encode %{
10135 __ edecl($dst$$Register, $src$$Register, false);
10136 %}
10137 ins_pipe(ialu_reg);
10138 %}
10139
10140 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10141 %{
10142 predicate(UseAPX && UseIncDec);
10143 match(Set dst (AddI (LoadI src) val));
10144 effect(KILL cr);
10145
10146 format %{ "edecl $dst, $src\t# int ndd" %}
10147 ins_encode %{
10148 __ edecl($dst$$Register, $src$$Address, false);
10149 %}
10150 ins_pipe(ialu_reg);
10151 %}
10152
10153 // XXX why does that use AddI
10154 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10155 %{
10156 predicate(UseIncDec);
10157 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158 effect(KILL cr);
10159
10160 ins_cost(125); // XXX
10161 format %{ "decl $dst\t# int" %}
10162 ins_encode %{
10163 __ decrementl($dst$$Address);
10164 %}
10165 ins_pipe(ialu_mem_imm);
10166 %}
10167
10168 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10169 %{
10170 predicate(VM_Version::supports_fast_2op_lea());
10171 match(Set dst (AddI (LShiftI index scale) disp));
10172
10173 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10174 ins_encode %{
10175 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10177 %}
10178 ins_pipe(ialu_reg_reg);
10179 %}
10180
10181 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10182 %{
10183 predicate(VM_Version::supports_fast_3op_lea());
10184 match(Set dst (AddI (AddI base index) disp));
10185
10186 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10187 ins_encode %{
10188 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10189 %}
10190 ins_pipe(ialu_reg_reg);
10191 %}
10192
10193 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10194 %{
10195 predicate(VM_Version::supports_fast_2op_lea());
10196 match(Set dst (AddI base (LShiftI index scale)));
10197
10198 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10199 ins_encode %{
10200 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10201 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10202 %}
10203 ins_pipe(ialu_reg_reg);
10204 %}
10205
10206 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10207 %{
10208 predicate(VM_Version::supports_fast_3op_lea());
10209 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10210
10211 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10212 ins_encode %{
10213 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10215 %}
10216 ins_pipe(ialu_reg_reg);
10217 %}
10218
10219 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10220 %{
10221 predicate(!UseAPX);
10222 match(Set dst (AddL dst src));
10223 effect(KILL cr);
10224 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10225
10226 format %{ "addq $dst, $src\t# long" %}
10227 ins_encode %{
10228 __ addq($dst$$Register, $src$$Register);
10229 %}
10230 ins_pipe(ialu_reg_reg);
10231 %}
10232
10233 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10234 %{
10235 predicate(UseAPX);
10236 match(Set dst (AddL src1 src2));
10237 effect(KILL cr);
10238 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10239
10240 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10241 ins_encode %{
10242 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10243 %}
10244 ins_pipe(ialu_reg_reg);
10245 %}
10246
10247 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10248 %{
10249 predicate(!UseAPX);
10250 match(Set dst (AddL dst src));
10251 effect(KILL cr);
10252 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10253
10254 format %{ "addq $dst, $src\t# long" %}
10255 ins_encode %{
10256 __ addq($dst$$Register, $src$$constant);
10257 %}
10258 ins_pipe( ialu_reg );
10259 %}
10260
10261 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10262 %{
10263 predicate(UseAPX);
10264 match(Set dst (AddL src1 src2));
10265 effect(KILL cr);
10266 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10267
10268 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10269 ins_encode %{
10270 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10271 %}
10272 ins_pipe( ialu_reg );
10273 %}
10274
10275 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10276 %{
10277 predicate(UseAPX);
10278 match(Set dst (AddL (LoadL src1) src2));
10279 effect(KILL cr);
10280 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281
10282 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10283 ins_encode %{
10284 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10285 %}
10286 ins_pipe( ialu_reg );
10287 %}
10288
10289 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10290 %{
10291 predicate(!UseAPX);
10292 match(Set dst (AddL dst (LoadL src)));
10293 effect(KILL cr);
10294 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295
10296 ins_cost(150); // XXX
10297 format %{ "addq $dst, $src\t# long" %}
10298 ins_encode %{
10299 __ addq($dst$$Register, $src$$Address);
10300 %}
10301 ins_pipe(ialu_reg_mem);
10302 %}
10303
10304 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10305 %{
10306 predicate(UseAPX);
10307 match(Set dst (AddL src1 (LoadL src2)));
10308 effect(KILL cr);
10309 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10310
10311 ins_cost(150);
10312 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10313 ins_encode %{
10314 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10315 %}
10316 ins_pipe(ialu_reg_mem);
10317 %}
10318
10319 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10320 %{
10321 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10322 effect(KILL cr);
10323 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10324
10325 ins_cost(150); // XXX
10326 format %{ "addq $dst, $src\t# long" %}
10327 ins_encode %{
10328 __ addq($dst$$Address, $src$$Register);
10329 %}
10330 ins_pipe(ialu_mem_reg);
10331 %}
10332
10333 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10334 %{
10335 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336 effect(KILL cr);
10337 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338
10339 ins_cost(125); // XXX
10340 format %{ "addq $dst, $src\t# long" %}
10341 ins_encode %{
10342 __ addq($dst$$Address, $src$$constant);
10343 %}
10344 ins_pipe(ialu_mem_imm);
10345 %}
10346
10347 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10348 %{
10349 predicate(!UseAPX && UseIncDec);
10350 match(Set dst (AddL dst src));
10351 effect(KILL cr);
10352
10353 format %{ "incq $dst\t# long" %}
10354 ins_encode %{
10355 __ incrementq($dst$$Register);
10356 %}
10357 ins_pipe(ialu_reg);
10358 %}
10359
10360 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10361 %{
10362 predicate(UseAPX && UseIncDec);
10363 match(Set dst (AddL src val));
10364 effect(KILL cr);
10365 flag(PD::Flag_ndd_demotable_opr1);
10366
10367 format %{ "eincq $dst, $src\t# long ndd" %}
10368 ins_encode %{
10369 __ eincq($dst$$Register, $src$$Register, false);
10370 %}
10371 ins_pipe(ialu_reg);
10372 %}
10373
10374 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10375 %{
10376 predicate(UseAPX && UseIncDec);
10377 match(Set dst (AddL (LoadL src) val));
10378 effect(KILL cr);
10379
10380 format %{ "eincq $dst, $src\t# long ndd" %}
10381 ins_encode %{
10382 __ eincq($dst$$Register, $src$$Address, false);
10383 %}
10384 ins_pipe(ialu_reg);
10385 %}
10386
10387 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10388 %{
10389 predicate(UseIncDec);
10390 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10391 effect(KILL cr);
10392
10393 ins_cost(125); // XXX
10394 format %{ "incq $dst\t# long" %}
10395 ins_encode %{
10396 __ incrementq($dst$$Address);
10397 %}
10398 ins_pipe(ialu_mem_imm);
10399 %}
10400
10401 // XXX why does that use AddL
10402 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10403 %{
10404 predicate(!UseAPX && UseIncDec);
10405 match(Set dst (AddL dst src));
10406 effect(KILL cr);
10407
10408 format %{ "decq $dst\t# long" %}
10409 ins_encode %{
10410 __ decrementq($dst$$Register);
10411 %}
10412 ins_pipe(ialu_reg);
10413 %}
10414
10415 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10416 %{
10417 predicate(UseAPX && UseIncDec);
10418 match(Set dst (AddL src val));
10419 effect(KILL cr);
10420 flag(PD::Flag_ndd_demotable_opr1);
10421
10422 format %{ "edecq $dst, $src\t# long ndd" %}
10423 ins_encode %{
10424 __ edecq($dst$$Register, $src$$Register, false);
10425 %}
10426 ins_pipe(ialu_reg);
10427 %}
10428
10429 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10430 %{
10431 predicate(UseAPX && UseIncDec);
10432 match(Set dst (AddL (LoadL src) val));
10433 effect(KILL cr);
10434
10435 format %{ "edecq $dst, $src\t# long ndd" %}
10436 ins_encode %{
10437 __ edecq($dst$$Register, $src$$Address, false);
10438 %}
10439 ins_pipe(ialu_reg);
10440 %}
10441
10442 // XXX why does that use AddL
10443 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10444 %{
10445 predicate(UseIncDec);
10446 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447 effect(KILL cr);
10448
10449 ins_cost(125); // XXX
10450 format %{ "decq $dst\t# long" %}
10451 ins_encode %{
10452 __ decrementq($dst$$Address);
10453 %}
10454 ins_pipe(ialu_mem_imm);
10455 %}
10456
10457 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10458 %{
10459 predicate(VM_Version::supports_fast_2op_lea());
10460 match(Set dst (AddL (LShiftL index scale) disp));
10461
10462 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10463 ins_encode %{
10464 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10465 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10466 %}
10467 ins_pipe(ialu_reg_reg);
10468 %}
10469
10470 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10471 %{
10472 predicate(VM_Version::supports_fast_3op_lea());
10473 match(Set dst (AddL (AddL base index) disp));
10474
10475 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10476 ins_encode %{
10477 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10478 %}
10479 ins_pipe(ialu_reg_reg);
10480 %}
10481
10482 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10483 %{
10484 predicate(VM_Version::supports_fast_2op_lea());
10485 match(Set dst (AddL base (LShiftL index scale)));
10486
10487 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10488 ins_encode %{
10489 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10490 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10491 %}
10492 ins_pipe(ialu_reg_reg);
10493 %}
10494
10495 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10496 %{
10497 predicate(VM_Version::supports_fast_3op_lea());
10498 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10499
10500 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10501 ins_encode %{
10502 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10504 %}
10505 ins_pipe(ialu_reg_reg);
10506 %}
10507
10508 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10509 %{
10510 match(Set dst (AddP dst src));
10511 effect(KILL cr);
10512 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10513
10514 format %{ "addq $dst, $src\t# ptr" %}
10515 ins_encode %{
10516 __ addq($dst$$Register, $src$$Register);
10517 %}
10518 ins_pipe(ialu_reg_reg);
10519 %}
10520
10521 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10522 %{
10523 match(Set dst (AddP dst src));
10524 effect(KILL cr);
10525 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526
10527 format %{ "addq $dst, $src\t# ptr" %}
10528 ins_encode %{
10529 __ addq($dst$$Register, $src$$constant);
10530 %}
10531 ins_pipe( ialu_reg );
10532 %}
10533
10534 // XXX addP mem ops ????
10535
10536 instruct checkCastPP(rRegP dst)
10537 %{
10538 match(Set dst (CheckCastPP dst));
10539
10540 size(0);
10541 format %{ "# checkcastPP of $dst" %}
10542 ins_encode(/* empty encoding */);
10543 ins_pipe(empty);
10544 %}
10545
10546 instruct castPP(rRegP dst)
10547 %{
10548 match(Set dst (CastPP dst));
10549
10550 size(0);
10551 format %{ "# castPP of $dst" %}
10552 ins_encode(/* empty encoding */);
10553 ins_pipe(empty);
10554 %}
10555
10556 instruct castII(rRegI dst)
10557 %{
10558 predicate(VerifyConstraintCasts == 0);
10559 match(Set dst (CastII dst));
10560
10561 size(0);
10562 format %{ "# castII of $dst" %}
10563 ins_encode(/* empty encoding */);
10564 ins_cost(0);
10565 ins_pipe(empty);
10566 %}
10567
10568 instruct castII_checked(rRegI dst, rFlagsReg cr)
10569 %{
10570 predicate(VerifyConstraintCasts > 0);
10571 match(Set dst (CastII dst));
10572
10573 effect(KILL cr);
10574 format %{ "# cast_checked_II $dst" %}
10575 ins_encode %{
10576 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10577 %}
10578 ins_pipe(pipe_slow);
10579 %}
10580
10581 instruct castLL(rRegL dst)
10582 %{
10583 predicate(VerifyConstraintCasts == 0);
10584 match(Set dst (CastLL dst));
10585
10586 size(0);
10587 format %{ "# castLL of $dst" %}
10588 ins_encode(/* empty encoding */);
10589 ins_cost(0);
10590 ins_pipe(empty);
10591 %}
10592
10593 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10594 %{
10595 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10596 match(Set dst (CastLL dst));
10597
10598 effect(KILL cr);
10599 format %{ "# cast_checked_LL $dst" %}
10600 ins_encode %{
10601 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10602 %}
10603 ins_pipe(pipe_slow);
10604 %}
10605
10606 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10607 %{
10608 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10609 match(Set dst (CastLL dst));
10610
10611 effect(KILL cr, TEMP tmp);
10612 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10613 ins_encode %{
10614 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10615 %}
10616 ins_pipe(pipe_slow);
10617 %}
10618
10619 instruct castFF(regF dst)
10620 %{
10621 match(Set dst (CastFF dst));
10622
10623 size(0);
10624 format %{ "# castFF of $dst" %}
10625 ins_encode(/* empty encoding */);
10626 ins_cost(0);
10627 ins_pipe(empty);
10628 %}
10629
10630 instruct castHH(regF dst)
10631 %{
10632 match(Set dst (CastHH dst));
10633
10634 size(0);
10635 format %{ "# castHH of $dst" %}
10636 ins_encode(/* empty encoding */);
10637 ins_cost(0);
10638 ins_pipe(empty);
10639 %}
10640
10641 instruct castDD(regD dst)
10642 %{
10643 match(Set dst (CastDD dst));
10644
10645 size(0);
10646 format %{ "# castDD of $dst" %}
10647 ins_encode(/* empty encoding */);
10648 ins_cost(0);
10649 ins_pipe(empty);
10650 %}
10651
10652 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10653 instruct compareAndSwapP(rRegI res,
10654 memory mem_ptr,
10655 rax_RegP oldval, rRegP newval,
10656 rFlagsReg cr)
10657 %{
10658 predicate(n->as_LoadStore()->barrier_data() == 0);
10659 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10660 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10661 effect(KILL cr, KILL oldval);
10662
10663 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10664 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10665 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10666 ins_encode %{
10667 __ lock();
10668 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10669 __ setcc(Assembler::equal, $res$$Register);
10670 %}
10671 ins_pipe( pipe_cmpxchg );
10672 %}
10673
10674 instruct compareAndSwapL(rRegI res,
10675 memory mem_ptr,
10676 rax_RegL oldval, rRegL newval,
10677 rFlagsReg cr)
10678 %{
10679 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10680 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10681 effect(KILL cr, KILL oldval);
10682
10683 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10684 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10685 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10686 ins_encode %{
10687 __ lock();
10688 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10689 __ setcc(Assembler::equal, $res$$Register);
10690 %}
10691 ins_pipe( pipe_cmpxchg );
10692 %}
10693
10694 instruct compareAndSwapI(rRegI res,
10695 memory mem_ptr,
10696 rax_RegI oldval, rRegI newval,
10697 rFlagsReg cr)
10698 %{
10699 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10700 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10701 effect(KILL cr, KILL oldval);
10702
10703 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10704 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10705 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10706 ins_encode %{
10707 __ lock();
10708 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10709 __ setcc(Assembler::equal, $res$$Register);
10710 %}
10711 ins_pipe( pipe_cmpxchg );
10712 %}
10713
10714 instruct compareAndSwapB(rRegI res,
10715 memory mem_ptr,
10716 rax_RegI oldval, rRegI newval,
10717 rFlagsReg cr)
10718 %{
10719 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10720 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10721 effect(KILL cr, KILL oldval);
10722
10723 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10724 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10725 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10726 ins_encode %{
10727 __ lock();
10728 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10729 __ setcc(Assembler::equal, $res$$Register);
10730 %}
10731 ins_pipe( pipe_cmpxchg );
10732 %}
10733
10734 instruct compareAndSwapS(rRegI res,
10735 memory mem_ptr,
10736 rax_RegI oldval, rRegI newval,
10737 rFlagsReg cr)
10738 %{
10739 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10740 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10741 effect(KILL cr, KILL oldval);
10742
10743 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10744 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10745 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10746 ins_encode %{
10747 __ lock();
10748 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10749 __ setcc(Assembler::equal, $res$$Register);
10750 %}
10751 ins_pipe( pipe_cmpxchg );
10752 %}
10753
10754 instruct compareAndSwapN(rRegI res,
10755 memory mem_ptr,
10756 rax_RegN oldval, rRegN newval,
10757 rFlagsReg cr) %{
10758 predicate(n->as_LoadStore()->barrier_data() == 0);
10759 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10760 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10761 effect(KILL cr, KILL oldval);
10762
10763 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10764 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10765 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10766 ins_encode %{
10767 __ lock();
10768 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10769 __ setcc(Assembler::equal, $res$$Register);
10770 %}
10771 ins_pipe( pipe_cmpxchg );
10772 %}
10773
10774 instruct compareAndExchangeB(
10775 memory mem_ptr,
10776 rax_RegI oldval, rRegI newval,
10777 rFlagsReg cr)
10778 %{
10779 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10780 effect(KILL cr);
10781
10782 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10783 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10784 ins_encode %{
10785 __ lock();
10786 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10787 %}
10788 ins_pipe( pipe_cmpxchg );
10789 %}
10790
10791 instruct compareAndExchangeS(
10792 memory mem_ptr,
10793 rax_RegI oldval, rRegI newval,
10794 rFlagsReg cr)
10795 %{
10796 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10797 effect(KILL cr);
10798
10799 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10801 ins_encode %{
10802 __ lock();
10803 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10804 %}
10805 ins_pipe( pipe_cmpxchg );
10806 %}
10807
10808 instruct compareAndExchangeI(
10809 memory mem_ptr,
10810 rax_RegI oldval, rRegI newval,
10811 rFlagsReg cr)
10812 %{
10813 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10814 effect(KILL cr);
10815
10816 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10817 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10818 ins_encode %{
10819 __ lock();
10820 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10821 %}
10822 ins_pipe( pipe_cmpxchg );
10823 %}
10824
10825 instruct compareAndExchangeL(
10826 memory mem_ptr,
10827 rax_RegL oldval, rRegL newval,
10828 rFlagsReg cr)
10829 %{
10830 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10831 effect(KILL cr);
10832
10833 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10834 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10835 ins_encode %{
10836 __ lock();
10837 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10838 %}
10839 ins_pipe( pipe_cmpxchg );
10840 %}
10841
10842 instruct compareAndExchangeN(
10843 memory mem_ptr,
10844 rax_RegN oldval, rRegN newval,
10845 rFlagsReg cr) %{
10846 predicate(n->as_LoadStore()->barrier_data() == 0);
10847 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10848 effect(KILL cr);
10849
10850 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10851 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10852 ins_encode %{
10853 __ lock();
10854 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10855 %}
10856 ins_pipe( pipe_cmpxchg );
10857 %}
10858
10859 instruct compareAndExchangeP(
10860 memory mem_ptr,
10861 rax_RegP oldval, rRegP newval,
10862 rFlagsReg cr)
10863 %{
10864 predicate(n->as_LoadStore()->barrier_data() == 0);
10865 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10866 effect(KILL cr);
10867
10868 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10869 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10870 ins_encode %{
10871 __ lock();
10872 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10873 %}
10874 ins_pipe( pipe_cmpxchg );
10875 %}
10876
10877 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878 predicate(n->as_LoadStore()->result_not_used());
10879 match(Set dummy (GetAndAddB mem add));
10880 effect(KILL cr);
10881 format %{ "addb_lock $mem, $add" %}
10882 ins_encode %{
10883 __ lock();
10884 __ addb($mem$$Address, $add$$Register);
10885 %}
10886 ins_pipe(pipe_cmpxchg);
10887 %}
10888
10889 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890 predicate(n->as_LoadStore()->result_not_used());
10891 match(Set dummy (GetAndAddB mem add));
10892 effect(KILL cr);
10893 format %{ "addb_lock $mem, $add" %}
10894 ins_encode %{
10895 __ lock();
10896 __ addb($mem$$Address, $add$$constant);
10897 %}
10898 ins_pipe(pipe_cmpxchg);
10899 %}
10900
10901 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10902 predicate(!n->as_LoadStore()->result_not_used());
10903 match(Set newval (GetAndAddB mem newval));
10904 effect(KILL cr);
10905 format %{ "xaddb_lock $mem, $newval" %}
10906 ins_encode %{
10907 __ lock();
10908 __ xaddb($mem$$Address, $newval$$Register);
10909 %}
10910 ins_pipe(pipe_cmpxchg);
10911 %}
10912
10913 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10914 predicate(n->as_LoadStore()->result_not_used());
10915 match(Set dummy (GetAndAddS mem add));
10916 effect(KILL cr);
10917 format %{ "addw_lock $mem, $add" %}
10918 ins_encode %{
10919 __ lock();
10920 __ addw($mem$$Address, $add$$Register);
10921 %}
10922 ins_pipe(pipe_cmpxchg);
10923 %}
10924
10925 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10926 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10927 match(Set dummy (GetAndAddS mem add));
10928 effect(KILL cr);
10929 format %{ "addw_lock $mem, $add" %}
10930 ins_encode %{
10931 __ lock();
10932 __ addw($mem$$Address, $add$$constant);
10933 %}
10934 ins_pipe(pipe_cmpxchg);
10935 %}
10936
10937 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10938 predicate(!n->as_LoadStore()->result_not_used());
10939 match(Set newval (GetAndAddS mem newval));
10940 effect(KILL cr);
10941 format %{ "xaddw_lock $mem, $newval" %}
10942 ins_encode %{
10943 __ lock();
10944 __ xaddw($mem$$Address, $newval$$Register);
10945 %}
10946 ins_pipe(pipe_cmpxchg);
10947 %}
10948
10949 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10950 predicate(n->as_LoadStore()->result_not_used());
10951 match(Set dummy (GetAndAddI mem add));
10952 effect(KILL cr);
10953 format %{ "addl_lock $mem, $add" %}
10954 ins_encode %{
10955 __ lock();
10956 __ addl($mem$$Address, $add$$Register);
10957 %}
10958 ins_pipe(pipe_cmpxchg);
10959 %}
10960
10961 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10962 predicate(n->as_LoadStore()->result_not_used());
10963 match(Set dummy (GetAndAddI mem add));
10964 effect(KILL cr);
10965 format %{ "addl_lock $mem, $add" %}
10966 ins_encode %{
10967 __ lock();
10968 __ addl($mem$$Address, $add$$constant);
10969 %}
10970 ins_pipe(pipe_cmpxchg);
10971 %}
10972
10973 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10974 predicate(!n->as_LoadStore()->result_not_used());
10975 match(Set newval (GetAndAddI mem newval));
10976 effect(KILL cr);
10977 format %{ "xaddl_lock $mem, $newval" %}
10978 ins_encode %{
10979 __ lock();
10980 __ xaddl($mem$$Address, $newval$$Register);
10981 %}
10982 ins_pipe(pipe_cmpxchg);
10983 %}
10984
10985 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10986 predicate(n->as_LoadStore()->result_not_used());
10987 match(Set dummy (GetAndAddL mem add));
10988 effect(KILL cr);
10989 format %{ "addq_lock $mem, $add" %}
10990 ins_encode %{
10991 __ lock();
10992 __ addq($mem$$Address, $add$$Register);
10993 %}
10994 ins_pipe(pipe_cmpxchg);
10995 %}
10996
10997 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10998 predicate(n->as_LoadStore()->result_not_used());
10999 match(Set dummy (GetAndAddL mem add));
11000 effect(KILL cr);
11001 format %{ "addq_lock $mem, $add" %}
11002 ins_encode %{
11003 __ lock();
11004 __ addq($mem$$Address, $add$$constant);
11005 %}
11006 ins_pipe(pipe_cmpxchg);
11007 %}
11008
11009 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11010 predicate(!n->as_LoadStore()->result_not_used());
11011 match(Set newval (GetAndAddL mem newval));
11012 effect(KILL cr);
11013 format %{ "xaddq_lock $mem, $newval" %}
11014 ins_encode %{
11015 __ lock();
11016 __ xaddq($mem$$Address, $newval$$Register);
11017 %}
11018 ins_pipe(pipe_cmpxchg);
11019 %}
11020
11021 instruct xchgB( memory mem, rRegI newval) %{
11022 match(Set newval (GetAndSetB mem newval));
11023 format %{ "XCHGB $newval,[$mem]" %}
11024 ins_encode %{
11025 __ xchgb($newval$$Register, $mem$$Address);
11026 %}
11027 ins_pipe( pipe_cmpxchg );
11028 %}
11029
11030 instruct xchgS( memory mem, rRegI newval) %{
11031 match(Set newval (GetAndSetS mem newval));
11032 format %{ "XCHGW $newval,[$mem]" %}
11033 ins_encode %{
11034 __ xchgw($newval$$Register, $mem$$Address);
11035 %}
11036 ins_pipe( pipe_cmpxchg );
11037 %}
11038
11039 instruct xchgI( memory mem, rRegI newval) %{
11040 match(Set newval (GetAndSetI mem newval));
11041 format %{ "XCHGL $newval,[$mem]" %}
11042 ins_encode %{
11043 __ xchgl($newval$$Register, $mem$$Address);
11044 %}
11045 ins_pipe( pipe_cmpxchg );
11046 %}
11047
11048 instruct xchgL( memory mem, rRegL newval) %{
11049 match(Set newval (GetAndSetL mem newval));
11050 format %{ "XCHGL $newval,[$mem]" %}
11051 ins_encode %{
11052 __ xchgq($newval$$Register, $mem$$Address);
11053 %}
11054 ins_pipe( pipe_cmpxchg );
11055 %}
11056
11057 instruct xchgP( memory mem, rRegP newval) %{
11058 match(Set newval (GetAndSetP mem newval));
11059 predicate(n->as_LoadStore()->barrier_data() == 0);
11060 format %{ "XCHGQ $newval,[$mem]" %}
11061 ins_encode %{
11062 __ xchgq($newval$$Register, $mem$$Address);
11063 %}
11064 ins_pipe( pipe_cmpxchg );
11065 %}
11066
11067 instruct xchgN( memory mem, rRegN newval) %{
11068 predicate(n->as_LoadStore()->barrier_data() == 0);
11069 match(Set newval (GetAndSetN mem newval));
11070 format %{ "XCHGL $newval,$mem]" %}
11071 ins_encode %{
11072 __ xchgl($newval$$Register, $mem$$Address);
11073 %}
11074 ins_pipe( pipe_cmpxchg );
11075 %}
11076
11077 //----------Abs Instructions-------------------------------------------
11078
11079 // Integer Absolute Instructions
11080 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11081 %{
11082 match(Set dst (AbsI src));
11083 effect(TEMP dst, KILL cr);
11084 format %{ "xorl $dst, $dst\t# abs int\n\t"
11085 "subl $dst, $src\n\t"
11086 "cmovll $dst, $src" %}
11087 ins_encode %{
11088 __ xorl($dst$$Register, $dst$$Register);
11089 __ subl($dst$$Register, $src$$Register);
11090 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11091 %}
11092
11093 ins_pipe(ialu_reg_reg);
11094 %}
11095
11096 // Long Absolute Instructions
11097 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11098 %{
11099 match(Set dst (AbsL src));
11100 effect(TEMP dst, KILL cr);
11101 format %{ "xorl $dst, $dst\t# abs long\n\t"
11102 "subq $dst, $src\n\t"
11103 "cmovlq $dst, $src" %}
11104 ins_encode %{
11105 __ xorl($dst$$Register, $dst$$Register);
11106 __ subq($dst$$Register, $src$$Register);
11107 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11108 %}
11109
11110 ins_pipe(ialu_reg_reg);
11111 %}
11112
11113 //----------Subtraction Instructions-------------------------------------------
11114
11115 // Integer Subtraction Instructions
11116 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11117 %{
11118 predicate(!UseAPX);
11119 match(Set dst (SubI dst src));
11120 effect(KILL cr);
11121 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11122
11123 format %{ "subl $dst, $src\t# int" %}
11124 ins_encode %{
11125 __ subl($dst$$Register, $src$$Register);
11126 %}
11127 ins_pipe(ialu_reg_reg);
11128 %}
11129
11130 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11131 %{
11132 predicate(UseAPX);
11133 match(Set dst (SubI src1 src2));
11134 effect(KILL cr);
11135 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11136
11137 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11138 ins_encode %{
11139 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11140 %}
11141 ins_pipe(ialu_reg_reg);
11142 %}
11143
11144 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11145 %{
11146 predicate(UseAPX);
11147 match(Set dst (SubI src1 src2));
11148 effect(KILL cr);
11149 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11150
11151 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11152 ins_encode %{
11153 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11154 %}
11155 ins_pipe(ialu_reg_reg);
11156 %}
11157
11158 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11159 %{
11160 predicate(UseAPX);
11161 match(Set dst (SubI (LoadI src1) src2));
11162 effect(KILL cr);
11163 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11164
11165 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11166 ins_encode %{
11167 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11168 %}
11169 ins_pipe(ialu_reg_reg);
11170 %}
11171
11172 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11173 %{
11174 predicate(!UseAPX);
11175 match(Set dst (SubI dst (LoadI src)));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 ins_cost(150);
11180 format %{ "subl $dst, $src\t# int" %}
11181 ins_encode %{
11182 __ subl($dst$$Register, $src$$Address);
11183 %}
11184 ins_pipe(ialu_reg_mem);
11185 %}
11186
11187 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11188 %{
11189 predicate(UseAPX);
11190 match(Set dst (SubI src1 (LoadI src2)));
11191 effect(KILL cr);
11192 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11193
11194 ins_cost(150);
11195 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11196 ins_encode %{
11197 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11198 %}
11199 ins_pipe(ialu_reg_mem);
11200 %}
11201
11202 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11203 %{
11204 predicate(UseAPX);
11205 match(Set dst (SubI (LoadI src1) src2));
11206 effect(KILL cr);
11207 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11208
11209 ins_cost(150);
11210 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11211 ins_encode %{
11212 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11213 %}
11214 ins_pipe(ialu_reg_mem);
11215 %}
11216
11217 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11218 %{
11219 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11220 effect(KILL cr);
11221 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11222
11223 ins_cost(150);
11224 format %{ "subl $dst, $src\t# int" %}
11225 ins_encode %{
11226 __ subl($dst$$Address, $src$$Register);
11227 %}
11228 ins_pipe(ialu_mem_reg);
11229 %}
11230
11231 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11232 %{
11233 predicate(!UseAPX);
11234 match(Set dst (SubL dst src));
11235 effect(KILL cr);
11236 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11237
11238 format %{ "subq $dst, $src\t# long" %}
11239 ins_encode %{
11240 __ subq($dst$$Register, $src$$Register);
11241 %}
11242 ins_pipe(ialu_reg_reg);
11243 %}
11244
11245 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11246 %{
11247 predicate(UseAPX);
11248 match(Set dst (SubL src1 src2));
11249 effect(KILL cr);
11250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11251
11252 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11253 ins_encode %{
11254 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11255 %}
11256 ins_pipe(ialu_reg_reg);
11257 %}
11258
11259 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11260 %{
11261 predicate(UseAPX);
11262 match(Set dst (SubL src1 src2));
11263 effect(KILL cr);
11264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11265
11266 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11267 ins_encode %{
11268 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11269 %}
11270 ins_pipe(ialu_reg_reg);
11271 %}
11272
11273 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11274 %{
11275 predicate(UseAPX);
11276 match(Set dst (SubL (LoadL src1) src2));
11277 effect(KILL cr);
11278 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11279
11280 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11281 ins_encode %{
11282 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11283 %}
11284 ins_pipe(ialu_reg_reg);
11285 %}
11286
11287 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11288 %{
11289 predicate(!UseAPX);
11290 match(Set dst (SubL dst (LoadL src)));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293
11294 ins_cost(150);
11295 format %{ "subq $dst, $src\t# long" %}
11296 ins_encode %{
11297 __ subq($dst$$Register, $src$$Address);
11298 %}
11299 ins_pipe(ialu_reg_mem);
11300 %}
11301
11302 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11303 %{
11304 predicate(UseAPX);
11305 match(Set dst (SubL src1 (LoadL src2)));
11306 effect(KILL cr);
11307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11308
11309 ins_cost(150);
11310 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11311 ins_encode %{
11312 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11313 %}
11314 ins_pipe(ialu_reg_mem);
11315 %}
11316
11317 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11318 %{
11319 predicate(UseAPX);
11320 match(Set dst (SubL (LoadL src1) src2));
11321 effect(KILL cr);
11322 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11323
11324 ins_cost(150);
11325 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11326 ins_encode %{
11327 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11328 %}
11329 ins_pipe(ialu_reg_mem);
11330 %}
11331
11332 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11333 %{
11334 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11335 effect(KILL cr);
11336 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11337
11338 ins_cost(150);
11339 format %{ "subq $dst, $src\t# long" %}
11340 ins_encode %{
11341 __ subq($dst$$Address, $src$$Register);
11342 %}
11343 ins_pipe(ialu_mem_reg);
11344 %}
11345
11346 // Subtract from a pointer
11347 // XXX hmpf???
11348 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11349 %{
11350 match(Set dst (AddP dst (SubI zero src)));
11351 effect(KILL cr);
11352
11353 format %{ "subq $dst, $src\t# ptr - int" %}
11354 ins_encode %{
11355 __ subq($dst$$Register, $src$$Register);
11356 %}
11357 ins_pipe(ialu_reg_reg);
11358 %}
11359
11360 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11361 %{
11362 predicate(!UseAPX);
11363 match(Set dst (SubI zero dst));
11364 effect(KILL cr);
11365 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11366
11367 format %{ "negl $dst\t# int" %}
11368 ins_encode %{
11369 __ negl($dst$$Register);
11370 %}
11371 ins_pipe(ialu_reg);
11372 %}
11373
11374 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11375 %{
11376 predicate(UseAPX);
11377 match(Set dst (SubI zero src));
11378 effect(KILL cr);
11379 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11380
11381 format %{ "enegl $dst, $src\t# int ndd" %}
11382 ins_encode %{
11383 __ enegl($dst$$Register, $src$$Register, false);
11384 %}
11385 ins_pipe(ialu_reg);
11386 %}
11387
11388 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11389 %{
11390 predicate(!UseAPX);
11391 match(Set dst (NegI dst));
11392 effect(KILL cr);
11393 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11394
11395 format %{ "negl $dst\t# int" %}
11396 ins_encode %{
11397 __ negl($dst$$Register);
11398 %}
11399 ins_pipe(ialu_reg);
11400 %}
11401
11402 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11403 %{
11404 predicate(UseAPX);
11405 match(Set dst (NegI src));
11406 effect(KILL cr);
11407 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11408
11409 format %{ "enegl $dst, $src\t# int ndd" %}
11410 ins_encode %{
11411 __ enegl($dst$$Register, $src$$Register, false);
11412 %}
11413 ins_pipe(ialu_reg);
11414 %}
11415
11416 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11417 %{
11418 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11419 effect(KILL cr);
11420 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11421
11422 format %{ "negl $dst\t# int" %}
11423 ins_encode %{
11424 __ negl($dst$$Address);
11425 %}
11426 ins_pipe(ialu_reg);
11427 %}
11428
11429 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11430 %{
11431 predicate(!UseAPX);
11432 match(Set dst (SubL zero dst));
11433 effect(KILL cr);
11434 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11435
11436 format %{ "negq $dst\t# long" %}
11437 ins_encode %{
11438 __ negq($dst$$Register);
11439 %}
11440 ins_pipe(ialu_reg);
11441 %}
11442
11443 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11444 %{
11445 predicate(UseAPX);
11446 match(Set dst (SubL zero src));
11447 effect(KILL cr);
11448 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11449
11450 format %{ "enegq $dst, $src\t# long ndd" %}
11451 ins_encode %{
11452 __ enegq($dst$$Register, $src$$Register, false);
11453 %}
11454 ins_pipe(ialu_reg);
11455 %}
11456
11457 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11458 %{
11459 predicate(!UseAPX);
11460 match(Set dst (NegL dst));
11461 effect(KILL cr);
11462 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11463
11464 format %{ "negq $dst\t# int" %}
11465 ins_encode %{
11466 __ negq($dst$$Register);
11467 %}
11468 ins_pipe(ialu_reg);
11469 %}
11470
11471 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11472 %{
11473 predicate(UseAPX);
11474 match(Set dst (NegL src));
11475 effect(KILL cr);
11476 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11477
11478 format %{ "enegq $dst, $src\t# long ndd" %}
11479 ins_encode %{
11480 __ enegq($dst$$Register, $src$$Register, false);
11481 %}
11482 ins_pipe(ialu_reg);
11483 %}
11484
11485 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11486 %{
11487 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11488 effect(KILL cr);
11489 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11490
11491 format %{ "negq $dst\t# long" %}
11492 ins_encode %{
11493 __ negq($dst$$Address);
11494 %}
11495 ins_pipe(ialu_reg);
11496 %}
11497
11498 //----------Multiplication/Division Instructions-------------------------------
11499 // Integer Multiplication Instructions
11500 // Multiply Register
11501
11502 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11503 %{
11504 predicate(!UseAPX);
11505 match(Set dst (MulI dst src));
11506 effect(KILL cr);
11507
11508 ins_cost(300);
11509 format %{ "imull $dst, $src\t# int" %}
11510 ins_encode %{
11511 __ imull($dst$$Register, $src$$Register);
11512 %}
11513 ins_pipe(ialu_reg_reg_alu0);
11514 %}
11515
11516 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11517 %{
11518 predicate(UseAPX);
11519 match(Set dst (MulI src1 src2));
11520 effect(KILL cr);
11521 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11522
11523 ins_cost(300);
11524 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11525 ins_encode %{
11526 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11527 %}
11528 ins_pipe(ialu_reg_reg_alu0);
11529 %}
11530
11531 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11532 %{
11533 match(Set dst (MulI src imm));
11534 effect(KILL cr);
11535
11536 ins_cost(300);
11537 format %{ "imull $dst, $src, $imm\t# int" %}
11538 ins_encode %{
11539 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11540 %}
11541 ins_pipe(ialu_reg_reg_alu0);
11542 %}
11543
11544 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11545 %{
11546 predicate(!UseAPX);
11547 match(Set dst (MulI dst (LoadI src)));
11548 effect(KILL cr);
11549
11550 ins_cost(350);
11551 format %{ "imull $dst, $src\t# int" %}
11552 ins_encode %{
11553 __ imull($dst$$Register, $src$$Address);
11554 %}
11555 ins_pipe(ialu_reg_mem_alu0);
11556 %}
11557
11558 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11559 %{
11560 predicate(UseAPX);
11561 match(Set dst (MulI src1 (LoadI src2)));
11562 effect(KILL cr);
11563 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11564
11565 ins_cost(350);
11566 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11567 ins_encode %{
11568 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11569 %}
11570 ins_pipe(ialu_reg_mem_alu0);
11571 %}
11572
11573 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11574 %{
11575 match(Set dst (MulI (LoadI src) imm));
11576 effect(KILL cr);
11577
11578 ins_cost(300);
11579 format %{ "imull $dst, $src, $imm\t# int" %}
11580 ins_encode %{
11581 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11582 %}
11583 ins_pipe(ialu_reg_mem_alu0);
11584 %}
11585
11586 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11587 %{
11588 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11589 effect(KILL cr, KILL src2);
11590
11591 expand %{ mulI_rReg(dst, src1, cr);
11592 mulI_rReg(src2, src3, cr);
11593 addI_rReg(dst, src2, cr); %}
11594 %}
11595
11596 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11597 %{
11598 predicate(!UseAPX);
11599 match(Set dst (MulL dst src));
11600 effect(KILL cr);
11601
11602 ins_cost(300);
11603 format %{ "imulq $dst, $src\t# long" %}
11604 ins_encode %{
11605 __ imulq($dst$$Register, $src$$Register);
11606 %}
11607 ins_pipe(ialu_reg_reg_alu0);
11608 %}
11609
11610 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11611 %{
11612 predicate(UseAPX);
11613 match(Set dst (MulL src1 src2));
11614 effect(KILL cr);
11615 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11616
11617 ins_cost(300);
11618 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11619 ins_encode %{
11620 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11621 %}
11622 ins_pipe(ialu_reg_reg_alu0);
11623 %}
11624
11625 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11626 %{
11627 match(Set dst (MulL src imm));
11628 effect(KILL cr);
11629
11630 ins_cost(300);
11631 format %{ "imulq $dst, $src, $imm\t# long" %}
11632 ins_encode %{
11633 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11634 %}
11635 ins_pipe(ialu_reg_reg_alu0);
11636 %}
11637
11638 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11639 %{
11640 predicate(!UseAPX);
11641 match(Set dst (MulL dst (LoadL src)));
11642 effect(KILL cr);
11643
11644 ins_cost(350);
11645 format %{ "imulq $dst, $src\t# long" %}
11646 ins_encode %{
11647 __ imulq($dst$$Register, $src$$Address);
11648 %}
11649 ins_pipe(ialu_reg_mem_alu0);
11650 %}
11651
11652 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11653 %{
11654 predicate(UseAPX);
11655 match(Set dst (MulL src1 (LoadL src2)));
11656 effect(KILL cr);
11657 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11658
11659 ins_cost(350);
11660 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11661 ins_encode %{
11662 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11663 %}
11664 ins_pipe(ialu_reg_mem_alu0);
11665 %}
11666
11667 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11668 %{
11669 match(Set dst (MulL (LoadL src) imm));
11670 effect(KILL cr);
11671
11672 ins_cost(300);
11673 format %{ "imulq $dst, $src, $imm\t# long" %}
11674 ins_encode %{
11675 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11676 %}
11677 ins_pipe(ialu_reg_mem_alu0);
11678 %}
11679
11680 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11681 %{
11682 match(Set dst (MulHiL src rax));
11683 effect(USE_KILL rax, KILL cr);
11684
11685 ins_cost(300);
11686 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11687 ins_encode %{
11688 __ imulq($src$$Register);
11689 %}
11690 ins_pipe(ialu_reg_reg_alu0);
11691 %}
11692
11693 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11694 %{
11695 match(Set dst (UMulHiL src rax));
11696 effect(USE_KILL rax, KILL cr);
11697
11698 ins_cost(300);
11699 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11700 ins_encode %{
11701 __ mulq($src$$Register);
11702 %}
11703 ins_pipe(ialu_reg_reg_alu0);
11704 %}
11705
11706 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11707 rFlagsReg cr)
11708 %{
11709 match(Set rax (DivI rax div));
11710 effect(KILL rdx, KILL cr);
11711
11712 ins_cost(30*100+10*100); // XXX
11713 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11714 "jne,s normal\n\t"
11715 "xorl rdx, rdx\n\t"
11716 "cmpl $div, -1\n\t"
11717 "je,s done\n"
11718 "normal: cdql\n\t"
11719 "idivl $div\n"
11720 "done:" %}
11721 ins_encode(cdql_enc(div));
11722 ins_pipe(ialu_reg_reg_alu0);
11723 %}
11724
11725 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11726 rFlagsReg cr)
11727 %{
11728 match(Set rax (DivL rax div));
11729 effect(KILL rdx, KILL cr);
11730
11731 ins_cost(30*100+10*100); // XXX
11732 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11733 "cmpq rax, rdx\n\t"
11734 "jne,s normal\n\t"
11735 "xorl rdx, rdx\n\t"
11736 "cmpq $div, -1\n\t"
11737 "je,s done\n"
11738 "normal: cdqq\n\t"
11739 "idivq $div\n"
11740 "done:" %}
11741 ins_encode(cdqq_enc(div));
11742 ins_pipe(ialu_reg_reg_alu0);
11743 %}
11744
11745 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11746 %{
11747 match(Set rax (UDivI rax div));
11748 effect(KILL rdx, KILL cr);
11749
11750 ins_cost(300);
11751 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11752 ins_encode %{
11753 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11754 %}
11755 ins_pipe(ialu_reg_reg_alu0);
11756 %}
11757
11758 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11759 %{
11760 match(Set rax (UDivL rax div));
11761 effect(KILL rdx, KILL cr);
11762
11763 ins_cost(300);
11764 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11765 ins_encode %{
11766 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11767 %}
11768 ins_pipe(ialu_reg_reg_alu0);
11769 %}
11770
11771 // Integer DIVMOD with Register, both quotient and mod results
11772 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11773 rFlagsReg cr)
11774 %{
11775 match(DivModI rax div);
11776 effect(KILL cr);
11777
11778 ins_cost(30*100+10*100); // XXX
11779 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11780 "jne,s normal\n\t"
11781 "xorl rdx, rdx\n\t"
11782 "cmpl $div, -1\n\t"
11783 "je,s done\n"
11784 "normal: cdql\n\t"
11785 "idivl $div\n"
11786 "done:" %}
11787 ins_encode(cdql_enc(div));
11788 ins_pipe(pipe_slow);
11789 %}
11790
11791 // Long DIVMOD with Register, both quotient and mod results
11792 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11793 rFlagsReg cr)
11794 %{
11795 match(DivModL rax div);
11796 effect(KILL cr);
11797
11798 ins_cost(30*100+10*100); // XXX
11799 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11800 "cmpq rax, rdx\n\t"
11801 "jne,s normal\n\t"
11802 "xorl rdx, rdx\n\t"
11803 "cmpq $div, -1\n\t"
11804 "je,s done\n"
11805 "normal: cdqq\n\t"
11806 "idivq $div\n"
11807 "done:" %}
11808 ins_encode(cdqq_enc(div));
11809 ins_pipe(pipe_slow);
11810 %}
11811
11812 // Unsigned integer DIVMOD with Register, both quotient and mod results
11813 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11814 no_rax_rdx_RegI div, rFlagsReg cr)
11815 %{
11816 match(UDivModI rax div);
11817 effect(TEMP tmp, KILL cr);
11818
11819 ins_cost(300);
11820 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11821 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11822 %}
11823 ins_encode %{
11824 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11825 %}
11826 ins_pipe(pipe_slow);
11827 %}
11828
11829 // Unsigned long DIVMOD with Register, both quotient and mod results
11830 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11831 no_rax_rdx_RegL div, rFlagsReg cr)
11832 %{
11833 match(UDivModL rax div);
11834 effect(TEMP tmp, KILL cr);
11835
11836 ins_cost(300);
11837 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11838 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11839 %}
11840 ins_encode %{
11841 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11842 %}
11843 ins_pipe(pipe_slow);
11844 %}
11845
11846 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11847 rFlagsReg cr)
11848 %{
11849 match(Set rdx (ModI rax div));
11850 effect(KILL rax, KILL cr);
11851
11852 ins_cost(300); // XXX
11853 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11854 "jne,s normal\n\t"
11855 "xorl rdx, rdx\n\t"
11856 "cmpl $div, -1\n\t"
11857 "je,s done\n"
11858 "normal: cdql\n\t"
11859 "idivl $div\n"
11860 "done:" %}
11861 ins_encode(cdql_enc(div));
11862 ins_pipe(ialu_reg_reg_alu0);
11863 %}
11864
11865 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11866 rFlagsReg cr)
11867 %{
11868 match(Set rdx (ModL rax div));
11869 effect(KILL rax, KILL cr);
11870
11871 ins_cost(300); // XXX
11872 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11873 "cmpq rax, rdx\n\t"
11874 "jne,s normal\n\t"
11875 "xorl rdx, rdx\n\t"
11876 "cmpq $div, -1\n\t"
11877 "je,s done\n"
11878 "normal: cdqq\n\t"
11879 "idivq $div\n"
11880 "done:" %}
11881 ins_encode(cdqq_enc(div));
11882 ins_pipe(ialu_reg_reg_alu0);
11883 %}
11884
11885 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11886 %{
11887 match(Set rdx (UModI rax div));
11888 effect(KILL rax, KILL cr);
11889
11890 ins_cost(300);
11891 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11892 ins_encode %{
11893 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11894 %}
11895 ins_pipe(ialu_reg_reg_alu0);
11896 %}
11897
11898 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11899 %{
11900 match(Set rdx (UModL rax div));
11901 effect(KILL rax, KILL cr);
11902
11903 ins_cost(300);
11904 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11905 ins_encode %{
11906 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11907 %}
11908 ins_pipe(ialu_reg_reg_alu0);
11909 %}
11910
11911 // Integer Shift Instructions
11912 // Shift Left by one, two, three
11913 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11914 %{
11915 predicate(!UseAPX);
11916 match(Set dst (LShiftI dst shift));
11917 effect(KILL cr);
11918
11919 format %{ "sall $dst, $shift" %}
11920 ins_encode %{
11921 __ sall($dst$$Register, $shift$$constant);
11922 %}
11923 ins_pipe(ialu_reg);
11924 %}
11925
11926 // Shift Left by one, two, three
11927 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11928 %{
11929 predicate(UseAPX);
11930 match(Set dst (LShiftI src shift));
11931 effect(KILL cr);
11932 flag(PD::Flag_ndd_demotable_opr1);
11933
11934 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11935 ins_encode %{
11936 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11937 %}
11938 ins_pipe(ialu_reg);
11939 %}
11940
11941 // Shift Left by 8-bit immediate
11942 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11943 %{
11944 predicate(!UseAPX);
11945 match(Set dst (LShiftI dst shift));
11946 effect(KILL cr);
11947
11948 format %{ "sall $dst, $shift" %}
11949 ins_encode %{
11950 __ sall($dst$$Register, $shift$$constant);
11951 %}
11952 ins_pipe(ialu_reg);
11953 %}
11954
11955 // Shift Left by 8-bit immediate
11956 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11957 %{
11958 predicate(UseAPX);
11959 match(Set dst (LShiftI src shift));
11960 effect(KILL cr);
11961 flag(PD::Flag_ndd_demotable_opr1);
11962
11963 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11964 ins_encode %{
11965 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11966 %}
11967 ins_pipe(ialu_reg);
11968 %}
11969
11970 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11971 %{
11972 predicate(UseAPX);
11973 match(Set dst (LShiftI (LoadI src) shift));
11974 effect(KILL cr);
11975
11976 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11977 ins_encode %{
11978 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11979 %}
11980 ins_pipe(ialu_reg);
11981 %}
11982
11983 // Shift Left by 8-bit immediate
11984 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11985 %{
11986 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11987 effect(KILL cr);
11988
11989 format %{ "sall $dst, $shift" %}
11990 ins_encode %{
11991 __ sall($dst$$Address, $shift$$constant);
11992 %}
11993 ins_pipe(ialu_mem_imm);
11994 %}
11995
11996 // Shift Left by variable
11997 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11998 %{
11999 predicate(!VM_Version::supports_bmi2());
12000 match(Set dst (LShiftI dst shift));
12001 effect(KILL cr);
12002
12003 format %{ "sall $dst, $shift" %}
12004 ins_encode %{
12005 __ sall($dst$$Register);
12006 %}
12007 ins_pipe(ialu_reg_reg);
12008 %}
12009
12010 // Shift Left by variable
12011 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12012 %{
12013 predicate(!VM_Version::supports_bmi2());
12014 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12015 effect(KILL cr);
12016
12017 format %{ "sall $dst, $shift" %}
12018 ins_encode %{
12019 __ sall($dst$$Address);
12020 %}
12021 ins_pipe(ialu_mem_reg);
12022 %}
12023
12024 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12025 %{
12026 predicate(VM_Version::supports_bmi2());
12027 match(Set dst (LShiftI src shift));
12028
12029 format %{ "shlxl $dst, $src, $shift" %}
12030 ins_encode %{
12031 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12032 %}
12033 ins_pipe(ialu_reg_reg);
12034 %}
12035
12036 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12037 %{
12038 predicate(VM_Version::supports_bmi2());
12039 match(Set dst (LShiftI (LoadI src) shift));
12040 ins_cost(175);
12041 format %{ "shlxl $dst, $src, $shift" %}
12042 ins_encode %{
12043 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12044 %}
12045 ins_pipe(ialu_reg_mem);
12046 %}
12047
12048 // Arithmetic Shift Right by 8-bit immediate
12049 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12050 %{
12051 predicate(!UseAPX);
12052 match(Set dst (RShiftI dst shift));
12053 effect(KILL cr);
12054
12055 format %{ "sarl $dst, $shift" %}
12056 ins_encode %{
12057 __ sarl($dst$$Register, $shift$$constant);
12058 %}
12059 ins_pipe(ialu_mem_imm);
12060 %}
12061
12062 // Arithmetic Shift Right by 8-bit immediate
12063 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12064 %{
12065 predicate(UseAPX);
12066 match(Set dst (RShiftI src shift));
12067 effect(KILL cr);
12068 flag(PD::Flag_ndd_demotable_opr1);
12069
12070 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12071 ins_encode %{
12072 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12073 %}
12074 ins_pipe(ialu_mem_imm);
12075 %}
12076
12077 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12078 %{
12079 predicate(UseAPX);
12080 match(Set dst (RShiftI (LoadI src) shift));
12081 effect(KILL cr);
12082
12083 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12084 ins_encode %{
12085 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12086 %}
12087 ins_pipe(ialu_mem_imm);
12088 %}
12089
12090 // Arithmetic Shift Right by 8-bit immediate
12091 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12092 %{
12093 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12094 effect(KILL cr);
12095
12096 format %{ "sarl $dst, $shift" %}
12097 ins_encode %{
12098 __ sarl($dst$$Address, $shift$$constant);
12099 %}
12100 ins_pipe(ialu_mem_imm);
12101 %}
12102
12103 // Arithmetic Shift Right by variable
12104 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12105 %{
12106 predicate(!VM_Version::supports_bmi2());
12107 match(Set dst (RShiftI dst shift));
12108 effect(KILL cr);
12109
12110 format %{ "sarl $dst, $shift" %}
12111 ins_encode %{
12112 __ sarl($dst$$Register);
12113 %}
12114 ins_pipe(ialu_reg_reg);
12115 %}
12116
12117 // Arithmetic Shift Right by variable
12118 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12119 %{
12120 predicate(!VM_Version::supports_bmi2());
12121 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12122 effect(KILL cr);
12123
12124 format %{ "sarl $dst, $shift" %}
12125 ins_encode %{
12126 __ sarl($dst$$Address);
12127 %}
12128 ins_pipe(ialu_mem_reg);
12129 %}
12130
12131 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12132 %{
12133 predicate(VM_Version::supports_bmi2());
12134 match(Set dst (RShiftI src shift));
12135
12136 format %{ "sarxl $dst, $src, $shift" %}
12137 ins_encode %{
12138 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12139 %}
12140 ins_pipe(ialu_reg_reg);
12141 %}
12142
12143 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12144 %{
12145 predicate(VM_Version::supports_bmi2());
12146 match(Set dst (RShiftI (LoadI src) shift));
12147 ins_cost(175);
12148 format %{ "sarxl $dst, $src, $shift" %}
12149 ins_encode %{
12150 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12151 %}
12152 ins_pipe(ialu_reg_mem);
12153 %}
12154
12155 // Logical Shift Right by 8-bit immediate
12156 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12157 %{
12158 predicate(!UseAPX);
12159 match(Set dst (URShiftI dst shift));
12160 effect(KILL cr);
12161
12162 format %{ "shrl $dst, $shift" %}
12163 ins_encode %{
12164 __ shrl($dst$$Register, $shift$$constant);
12165 %}
12166 ins_pipe(ialu_reg);
12167 %}
12168
12169 // Logical Shift Right by 8-bit immediate
12170 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12171 %{
12172 predicate(UseAPX);
12173 match(Set dst (URShiftI src shift));
12174 effect(KILL cr);
12175 flag(PD::Flag_ndd_demotable_opr1);
12176
12177 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12178 ins_encode %{
12179 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12180 %}
12181 ins_pipe(ialu_reg);
12182 %}
12183
12184 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12185 %{
12186 predicate(UseAPX);
12187 match(Set dst (URShiftI (LoadI src) shift));
12188 effect(KILL cr);
12189
12190 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12191 ins_encode %{
12192 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12193 %}
12194 ins_pipe(ialu_reg);
12195 %}
12196
12197 // Logical Shift Right by 8-bit immediate
12198 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12199 %{
12200 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12201 effect(KILL cr);
12202
12203 format %{ "shrl $dst, $shift" %}
12204 ins_encode %{
12205 __ shrl($dst$$Address, $shift$$constant);
12206 %}
12207 ins_pipe(ialu_mem_imm);
12208 %}
12209
12210 // Logical Shift Right by variable
12211 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12212 %{
12213 predicate(!VM_Version::supports_bmi2());
12214 match(Set dst (URShiftI dst shift));
12215 effect(KILL cr);
12216
12217 format %{ "shrl $dst, $shift" %}
12218 ins_encode %{
12219 __ shrl($dst$$Register);
12220 %}
12221 ins_pipe(ialu_reg_reg);
12222 %}
12223
12224 // Logical Shift Right by variable
12225 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12226 %{
12227 predicate(!VM_Version::supports_bmi2());
12228 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12229 effect(KILL cr);
12230
12231 format %{ "shrl $dst, $shift" %}
12232 ins_encode %{
12233 __ shrl($dst$$Address);
12234 %}
12235 ins_pipe(ialu_mem_reg);
12236 %}
12237
12238 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12239 %{
12240 predicate(VM_Version::supports_bmi2());
12241 match(Set dst (URShiftI src shift));
12242
12243 format %{ "shrxl $dst, $src, $shift" %}
12244 ins_encode %{
12245 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12246 %}
12247 ins_pipe(ialu_reg_reg);
12248 %}
12249
12250 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12251 %{
12252 predicate(VM_Version::supports_bmi2());
12253 match(Set dst (URShiftI (LoadI src) shift));
12254 ins_cost(175);
12255 format %{ "shrxl $dst, $src, $shift" %}
12256 ins_encode %{
12257 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12258 %}
12259 ins_pipe(ialu_reg_mem);
12260 %}
12261
12262 // Long Shift Instructions
12263 // Shift Left by one, two, three
12264 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12265 %{
12266 predicate(!UseAPX);
12267 match(Set dst (LShiftL dst shift));
12268 effect(KILL cr);
12269
12270 format %{ "salq $dst, $shift" %}
12271 ins_encode %{
12272 __ salq($dst$$Register, $shift$$constant);
12273 %}
12274 ins_pipe(ialu_reg);
12275 %}
12276
12277 // Shift Left by one, two, three
12278 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12279 %{
12280 predicate(UseAPX);
12281 match(Set dst (LShiftL src shift));
12282 effect(KILL cr);
12283 flag(PD::Flag_ndd_demotable_opr1);
12284
12285 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12286 ins_encode %{
12287 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12288 %}
12289 ins_pipe(ialu_reg);
12290 %}
12291
12292 // Shift Left by 8-bit immediate
12293 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12294 %{
12295 predicate(!UseAPX);
12296 match(Set dst (LShiftL dst shift));
12297 effect(KILL cr);
12298
12299 format %{ "salq $dst, $shift" %}
12300 ins_encode %{
12301 __ salq($dst$$Register, $shift$$constant);
12302 %}
12303 ins_pipe(ialu_reg);
12304 %}
12305
12306 // Shift Left by 8-bit immediate
12307 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12308 %{
12309 predicate(UseAPX);
12310 match(Set dst (LShiftL src shift));
12311 effect(KILL cr);
12312 flag(PD::Flag_ndd_demotable_opr1);
12313
12314 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12315 ins_encode %{
12316 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12317 %}
12318 ins_pipe(ialu_reg);
12319 %}
12320
12321 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12322 %{
12323 predicate(UseAPX);
12324 match(Set dst (LShiftL (LoadL src) shift));
12325 effect(KILL cr);
12326
12327 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12328 ins_encode %{
12329 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12330 %}
12331 ins_pipe(ialu_reg);
12332 %}
12333
12334 // Shift Left by 8-bit immediate
12335 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12336 %{
12337 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12338 effect(KILL cr);
12339
12340 format %{ "salq $dst, $shift" %}
12341 ins_encode %{
12342 __ salq($dst$$Address, $shift$$constant);
12343 %}
12344 ins_pipe(ialu_mem_imm);
12345 %}
12346
12347 // Shift Left by variable
12348 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12349 %{
12350 predicate(!VM_Version::supports_bmi2());
12351 match(Set dst (LShiftL dst shift));
12352 effect(KILL cr);
12353
12354 format %{ "salq $dst, $shift" %}
12355 ins_encode %{
12356 __ salq($dst$$Register);
12357 %}
12358 ins_pipe(ialu_reg_reg);
12359 %}
12360
12361 // Shift Left by variable
12362 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12363 %{
12364 predicate(!VM_Version::supports_bmi2());
12365 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12366 effect(KILL cr);
12367
12368 format %{ "salq $dst, $shift" %}
12369 ins_encode %{
12370 __ salq($dst$$Address);
12371 %}
12372 ins_pipe(ialu_mem_reg);
12373 %}
12374
12375 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12376 %{
12377 predicate(VM_Version::supports_bmi2());
12378 match(Set dst (LShiftL src shift));
12379
12380 format %{ "shlxq $dst, $src, $shift" %}
12381 ins_encode %{
12382 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12383 %}
12384 ins_pipe(ialu_reg_reg);
12385 %}
12386
12387 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12388 %{
12389 predicate(VM_Version::supports_bmi2());
12390 match(Set dst (LShiftL (LoadL src) shift));
12391 ins_cost(175);
12392 format %{ "shlxq $dst, $src, $shift" %}
12393 ins_encode %{
12394 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12395 %}
12396 ins_pipe(ialu_reg_mem);
12397 %}
12398
12399 // Arithmetic Shift Right by 8-bit immediate
12400 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12401 %{
12402 predicate(!UseAPX);
12403 match(Set dst (RShiftL dst shift));
12404 effect(KILL cr);
12405
12406 format %{ "sarq $dst, $shift" %}
12407 ins_encode %{
12408 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12409 %}
12410 ins_pipe(ialu_mem_imm);
12411 %}
12412
12413 // Arithmetic Shift Right by 8-bit immediate
12414 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12415 %{
12416 predicate(UseAPX);
12417 match(Set dst (RShiftL src shift));
12418 effect(KILL cr);
12419 flag(PD::Flag_ndd_demotable_opr1);
12420
12421 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12422 ins_encode %{
12423 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12424 %}
12425 ins_pipe(ialu_mem_imm);
12426 %}
12427
12428 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12429 %{
12430 predicate(UseAPX);
12431 match(Set dst (RShiftL (LoadL src) shift));
12432 effect(KILL cr);
12433
12434 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12435 ins_encode %{
12436 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12437 %}
12438 ins_pipe(ialu_mem_imm);
12439 %}
12440
12441 // Arithmetic Shift Right by 8-bit immediate
12442 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12443 %{
12444 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12445 effect(KILL cr);
12446
12447 format %{ "sarq $dst, $shift" %}
12448 ins_encode %{
12449 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12450 %}
12451 ins_pipe(ialu_mem_imm);
12452 %}
12453
12454 // Arithmetic Shift Right by variable
12455 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12456 %{
12457 predicate(!VM_Version::supports_bmi2());
12458 match(Set dst (RShiftL dst shift));
12459 effect(KILL cr);
12460
12461 format %{ "sarq $dst, $shift" %}
12462 ins_encode %{
12463 __ sarq($dst$$Register);
12464 %}
12465 ins_pipe(ialu_reg_reg);
12466 %}
12467
12468 // Arithmetic Shift Right by variable
12469 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12470 %{
12471 predicate(!VM_Version::supports_bmi2());
12472 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12473 effect(KILL cr);
12474
12475 format %{ "sarq $dst, $shift" %}
12476 ins_encode %{
12477 __ sarq($dst$$Address);
12478 %}
12479 ins_pipe(ialu_mem_reg);
12480 %}
12481
12482 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12483 %{
12484 predicate(VM_Version::supports_bmi2());
12485 match(Set dst (RShiftL src shift));
12486
12487 format %{ "sarxq $dst, $src, $shift" %}
12488 ins_encode %{
12489 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12490 %}
12491 ins_pipe(ialu_reg_reg);
12492 %}
12493
12494 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12495 %{
12496 predicate(VM_Version::supports_bmi2());
12497 match(Set dst (RShiftL (LoadL src) shift));
12498 ins_cost(175);
12499 format %{ "sarxq $dst, $src, $shift" %}
12500 ins_encode %{
12501 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12502 %}
12503 ins_pipe(ialu_reg_mem);
12504 %}
12505
12506 // Logical Shift Right by 8-bit immediate
12507 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12508 %{
12509 predicate(!UseAPX);
12510 match(Set dst (URShiftL dst shift));
12511 effect(KILL cr);
12512
12513 format %{ "shrq $dst, $shift" %}
12514 ins_encode %{
12515 __ shrq($dst$$Register, $shift$$constant);
12516 %}
12517 ins_pipe(ialu_reg);
12518 %}
12519
12520 // Logical Shift Right by 8-bit immediate
12521 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12522 %{
12523 predicate(UseAPX);
12524 match(Set dst (URShiftL src shift));
12525 effect(KILL cr);
12526 flag(PD::Flag_ndd_demotable_opr1);
12527
12528 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12529 ins_encode %{
12530 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12531 %}
12532 ins_pipe(ialu_reg);
12533 %}
12534
12535 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12536 %{
12537 predicate(UseAPX);
12538 match(Set dst (URShiftL (LoadL src) shift));
12539 effect(KILL cr);
12540
12541 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12542 ins_encode %{
12543 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12544 %}
12545 ins_pipe(ialu_reg);
12546 %}
12547
12548 // Logical Shift Right by 8-bit immediate
12549 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12550 %{
12551 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12552 effect(KILL cr);
12553
12554 format %{ "shrq $dst, $shift" %}
12555 ins_encode %{
12556 __ shrq($dst$$Address, $shift$$constant);
12557 %}
12558 ins_pipe(ialu_mem_imm);
12559 %}
12560
12561 // Logical Shift Right by variable
12562 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12563 %{
12564 predicate(!VM_Version::supports_bmi2());
12565 match(Set dst (URShiftL dst shift));
12566 effect(KILL cr);
12567
12568 format %{ "shrq $dst, $shift" %}
12569 ins_encode %{
12570 __ shrq($dst$$Register);
12571 %}
12572 ins_pipe(ialu_reg_reg);
12573 %}
12574
12575 // Logical Shift Right by variable
12576 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12577 %{
12578 predicate(!VM_Version::supports_bmi2());
12579 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12580 effect(KILL cr);
12581
12582 format %{ "shrq $dst, $shift" %}
12583 ins_encode %{
12584 __ shrq($dst$$Address);
12585 %}
12586 ins_pipe(ialu_mem_reg);
12587 %}
12588
12589 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12590 %{
12591 predicate(VM_Version::supports_bmi2());
12592 match(Set dst (URShiftL src shift));
12593
12594 format %{ "shrxq $dst, $src, $shift" %}
12595 ins_encode %{
12596 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12597 %}
12598 ins_pipe(ialu_reg_reg);
12599 %}
12600
12601 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12602 %{
12603 predicate(VM_Version::supports_bmi2());
12604 match(Set dst (URShiftL (LoadL src) shift));
12605 ins_cost(175);
12606 format %{ "shrxq $dst, $src, $shift" %}
12607 ins_encode %{
12608 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12609 %}
12610 ins_pipe(ialu_reg_mem);
12611 %}
12612
12613 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12614 // This idiom is used by the compiler for the i2b bytecode.
12615 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12616 %{
12617 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12618
12619 format %{ "movsbl $dst, $src\t# i2b" %}
12620 ins_encode %{
12621 __ movsbl($dst$$Register, $src$$Register);
12622 %}
12623 ins_pipe(ialu_reg_reg);
12624 %}
12625
12626 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12627 // This idiom is used by the compiler the i2s bytecode.
12628 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12629 %{
12630 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12631
12632 format %{ "movswl $dst, $src\t# i2s" %}
12633 ins_encode %{
12634 __ movswl($dst$$Register, $src$$Register);
12635 %}
12636 ins_pipe(ialu_reg_reg);
12637 %}
12638
12639 // ROL/ROR instructions
12640
12641 // Rotate left by constant.
12642 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12643 %{
12644 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12645 match(Set dst (RotateLeft dst shift));
12646 effect(KILL cr);
12647 format %{ "roll $dst, $shift" %}
12648 ins_encode %{
12649 __ roll($dst$$Register, $shift$$constant);
12650 %}
12651 ins_pipe(ialu_reg);
12652 %}
12653
12654 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12655 %{
12656 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12657 match(Set dst (RotateLeft src shift));
12658 format %{ "rolxl $dst, $src, $shift" %}
12659 ins_encode %{
12660 int shift = 32 - ($shift$$constant & 31);
12661 __ rorxl($dst$$Register, $src$$Register, shift);
12662 %}
12663 ins_pipe(ialu_reg_reg);
12664 %}
12665
12666 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12667 %{
12668 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12669 match(Set dst (RotateLeft (LoadI src) shift));
12670 ins_cost(175);
12671 format %{ "rolxl $dst, $src, $shift" %}
12672 ins_encode %{
12673 int shift = 32 - ($shift$$constant & 31);
12674 __ rorxl($dst$$Register, $src$$Address, shift);
12675 %}
12676 ins_pipe(ialu_reg_mem);
12677 %}
12678
12679 // Rotate Left by variable
12680 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12681 %{
12682 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12683 match(Set dst (RotateLeft dst shift));
12684 effect(KILL cr);
12685 format %{ "roll $dst, $shift" %}
12686 ins_encode %{
12687 __ roll($dst$$Register);
12688 %}
12689 ins_pipe(ialu_reg_reg);
12690 %}
12691
12692 // Rotate Left by variable
12693 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12694 %{
12695 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12696 match(Set dst (RotateLeft src shift));
12697 effect(KILL cr);
12698 flag(PD::Flag_ndd_demotable_opr1);
12699
12700 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12701 ins_encode %{
12702 __ eroll($dst$$Register, $src$$Register, false);
12703 %}
12704 ins_pipe(ialu_reg_reg);
12705 %}
12706
12707 // Rotate Right by constant.
12708 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12709 %{
12710 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12711 match(Set dst (RotateRight dst shift));
12712 effect(KILL cr);
12713 format %{ "rorl $dst, $shift" %}
12714 ins_encode %{
12715 __ rorl($dst$$Register, $shift$$constant);
12716 %}
12717 ins_pipe(ialu_reg);
12718 %}
12719
12720 // Rotate Right by constant.
12721 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12722 %{
12723 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12724 match(Set dst (RotateRight src shift));
12725 format %{ "rorxl $dst, $src, $shift" %}
12726 ins_encode %{
12727 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12728 %}
12729 ins_pipe(ialu_reg_reg);
12730 %}
12731
12732 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12733 %{
12734 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12735 match(Set dst (RotateRight (LoadI src) shift));
12736 ins_cost(175);
12737 format %{ "rorxl $dst, $src, $shift" %}
12738 ins_encode %{
12739 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12740 %}
12741 ins_pipe(ialu_reg_mem);
12742 %}
12743
12744 // Rotate Right by variable
12745 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12746 %{
12747 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12748 match(Set dst (RotateRight dst shift));
12749 effect(KILL cr);
12750 format %{ "rorl $dst, $shift" %}
12751 ins_encode %{
12752 __ rorl($dst$$Register);
12753 %}
12754 ins_pipe(ialu_reg_reg);
12755 %}
12756
12757 // Rotate Right by variable
12758 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12759 %{
12760 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12761 match(Set dst (RotateRight src shift));
12762 effect(KILL cr);
12763 flag(PD::Flag_ndd_demotable_opr1);
12764
12765 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12766 ins_encode %{
12767 __ erorl($dst$$Register, $src$$Register, false);
12768 %}
12769 ins_pipe(ialu_reg_reg);
12770 %}
12771
12772 // Rotate Left by constant.
12773 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12774 %{
12775 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12776 match(Set dst (RotateLeft dst shift));
12777 effect(KILL cr);
12778 format %{ "rolq $dst, $shift" %}
12779 ins_encode %{
12780 __ rolq($dst$$Register, $shift$$constant);
12781 %}
12782 ins_pipe(ialu_reg);
12783 %}
12784
12785 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12786 %{
12787 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12788 match(Set dst (RotateLeft src shift));
12789 format %{ "rolxq $dst, $src, $shift" %}
12790 ins_encode %{
12791 int shift = 64 - ($shift$$constant & 63);
12792 __ rorxq($dst$$Register, $src$$Register, shift);
12793 %}
12794 ins_pipe(ialu_reg_reg);
12795 %}
12796
12797 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12798 %{
12799 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12800 match(Set dst (RotateLeft (LoadL src) shift));
12801 ins_cost(175);
12802 format %{ "rolxq $dst, $src, $shift" %}
12803 ins_encode %{
12804 int shift = 64 - ($shift$$constant & 63);
12805 __ rorxq($dst$$Register, $src$$Address, shift);
12806 %}
12807 ins_pipe(ialu_reg_mem);
12808 %}
12809
12810 // Rotate Left by variable
12811 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12812 %{
12813 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12814 match(Set dst (RotateLeft dst shift));
12815 effect(KILL cr);
12816
12817 format %{ "rolq $dst, $shift" %}
12818 ins_encode %{
12819 __ rolq($dst$$Register);
12820 %}
12821 ins_pipe(ialu_reg_reg);
12822 %}
12823
12824 // Rotate Left by variable
12825 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12826 %{
12827 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12828 match(Set dst (RotateLeft src shift));
12829 effect(KILL cr);
12830 flag(PD::Flag_ndd_demotable_opr1);
12831
12832 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12833 ins_encode %{
12834 __ erolq($dst$$Register, $src$$Register, false);
12835 %}
12836 ins_pipe(ialu_reg_reg);
12837 %}
12838
12839 // Rotate Right by constant.
12840 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12841 %{
12842 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12843 match(Set dst (RotateRight dst shift));
12844 effect(KILL cr);
12845 format %{ "rorq $dst, $shift" %}
12846 ins_encode %{
12847 __ rorq($dst$$Register, $shift$$constant);
12848 %}
12849 ins_pipe(ialu_reg);
12850 %}
12851
12852 // Rotate Right by constant
12853 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12854 %{
12855 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12856 match(Set dst (RotateRight src shift));
12857 format %{ "rorxq $dst, $src, $shift" %}
12858 ins_encode %{
12859 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12860 %}
12861 ins_pipe(ialu_reg_reg);
12862 %}
12863
12864 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12865 %{
12866 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12867 match(Set dst (RotateRight (LoadL src) shift));
12868 ins_cost(175);
12869 format %{ "rorxq $dst, $src, $shift" %}
12870 ins_encode %{
12871 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12872 %}
12873 ins_pipe(ialu_reg_mem);
12874 %}
12875
12876 // Rotate Right by variable
12877 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12878 %{
12879 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12880 match(Set dst (RotateRight dst shift));
12881 effect(KILL cr);
12882 format %{ "rorq $dst, $shift" %}
12883 ins_encode %{
12884 __ rorq($dst$$Register);
12885 %}
12886 ins_pipe(ialu_reg_reg);
12887 %}
12888
12889 // Rotate Right by variable
12890 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12891 %{
12892 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12893 match(Set dst (RotateRight src shift));
12894 effect(KILL cr);
12895 flag(PD::Flag_ndd_demotable_opr1);
12896
12897 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12898 ins_encode %{
12899 __ erorq($dst$$Register, $src$$Register, false);
12900 %}
12901 ins_pipe(ialu_reg_reg);
12902 %}
12903
12904 //----------------------------- CompressBits/ExpandBits ------------------------
12905
12906 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12907 predicate(n->bottom_type()->isa_long());
12908 match(Set dst (CompressBits src mask));
12909 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12910 ins_encode %{
12911 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12912 %}
12913 ins_pipe( pipe_slow );
12914 %}
12915
12916 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12917 predicate(n->bottom_type()->isa_long());
12918 match(Set dst (ExpandBits src mask));
12919 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12920 ins_encode %{
12921 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12922 %}
12923 ins_pipe( pipe_slow );
12924 %}
12925
12926 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12927 predicate(n->bottom_type()->isa_long());
12928 match(Set dst (CompressBits src (LoadL mask)));
12929 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12930 ins_encode %{
12931 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12932 %}
12933 ins_pipe( pipe_slow );
12934 %}
12935
12936 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12937 predicate(n->bottom_type()->isa_long());
12938 match(Set dst (ExpandBits src (LoadL mask)));
12939 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12940 ins_encode %{
12941 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12942 %}
12943 ins_pipe( pipe_slow );
12944 %}
12945
12946
12947 // Logical Instructions
12948
12949 // Integer Logical Instructions
12950
12951 // And Instructions
12952 // And Register with Register
12953 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12954 %{
12955 predicate(!UseAPX);
12956 match(Set dst (AndI dst src));
12957 effect(KILL cr);
12958 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12959
12960 format %{ "andl $dst, $src\t# int" %}
12961 ins_encode %{
12962 __ andl($dst$$Register, $src$$Register);
12963 %}
12964 ins_pipe(ialu_reg_reg);
12965 %}
12966
12967 // And Register with Register using New Data Destination (NDD)
12968 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12969 %{
12970 predicate(UseAPX);
12971 match(Set dst (AndI src1 src2));
12972 effect(KILL cr);
12973 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12974
12975 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12976 ins_encode %{
12977 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12978
12979 %}
12980 ins_pipe(ialu_reg_reg);
12981 %}
12982
12983 // And Register with Immediate 255
12984 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12985 %{
12986 match(Set dst (AndI src mask));
12987
12988 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12989 ins_encode %{
12990 __ movzbl($dst$$Register, $src$$Register);
12991 %}
12992 ins_pipe(ialu_reg);
12993 %}
12994
12995 // And Register with Immediate 255 and promote to long
12996 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12997 %{
12998 match(Set dst (ConvI2L (AndI src mask)));
12999
13000 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13001 ins_encode %{
13002 __ movzbl($dst$$Register, $src$$Register);
13003 %}
13004 ins_pipe(ialu_reg);
13005 %}
13006
13007 // And Register with Immediate 65535
13008 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13009 %{
13010 match(Set dst (AndI src mask));
13011
13012 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13013 ins_encode %{
13014 __ movzwl($dst$$Register, $src$$Register);
13015 %}
13016 ins_pipe(ialu_reg);
13017 %}
13018
13019 // And Register with Immediate 65535 and promote to long
13020 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13021 %{
13022 match(Set dst (ConvI2L (AndI src mask)));
13023
13024 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13025 ins_encode %{
13026 __ movzwl($dst$$Register, $src$$Register);
13027 %}
13028 ins_pipe(ialu_reg);
13029 %}
13030
13031 // Can skip int2long conversions after AND with small bitmask
13032 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13033 %{
13034 predicate(VM_Version::supports_bmi2());
13035 ins_cost(125);
13036 effect(TEMP tmp, KILL cr);
13037 match(Set dst (ConvI2L (AndI src mask)));
13038 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13039 ins_encode %{
13040 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13041 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13042 %}
13043 ins_pipe(ialu_reg_reg);
13044 %}
13045
13046 // And Register with Immediate
13047 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13048 %{
13049 predicate(!UseAPX);
13050 match(Set dst (AndI dst src));
13051 effect(KILL cr);
13052 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13053
13054 format %{ "andl $dst, $src\t# int" %}
13055 ins_encode %{
13056 __ andl($dst$$Register, $src$$constant);
13057 %}
13058 ins_pipe(ialu_reg);
13059 %}
13060
13061 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13062 %{
13063 predicate(UseAPX);
13064 match(Set dst (AndI src1 src2));
13065 effect(KILL cr);
13066 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13067
13068 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13069 ins_encode %{
13070 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13071 %}
13072 ins_pipe(ialu_reg);
13073 %}
13074
13075 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13076 %{
13077 predicate(UseAPX);
13078 match(Set dst (AndI (LoadI src1) src2));
13079 effect(KILL cr);
13080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13081
13082 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13083 ins_encode %{
13084 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13085 %}
13086 ins_pipe(ialu_reg);
13087 %}
13088
13089 // And Register with Memory
13090 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13091 %{
13092 predicate(!UseAPX);
13093 match(Set dst (AndI dst (LoadI src)));
13094 effect(KILL cr);
13095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13096
13097 ins_cost(150);
13098 format %{ "andl $dst, $src\t# int" %}
13099 ins_encode %{
13100 __ andl($dst$$Register, $src$$Address);
13101 %}
13102 ins_pipe(ialu_reg_mem);
13103 %}
13104
13105 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13106 %{
13107 predicate(UseAPX);
13108 match(Set dst (AndI src1 (LoadI src2)));
13109 effect(KILL cr);
13110 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13111
13112 ins_cost(150);
13113 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13114 ins_encode %{
13115 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13116 %}
13117 ins_pipe(ialu_reg_mem);
13118 %}
13119
13120 // And Memory with Register
13121 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13122 %{
13123 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13124 effect(KILL cr);
13125 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13126
13127 ins_cost(150);
13128 format %{ "andb $dst, $src\t# byte" %}
13129 ins_encode %{
13130 __ andb($dst$$Address, $src$$Register);
13131 %}
13132 ins_pipe(ialu_mem_reg);
13133 %}
13134
13135 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13136 %{
13137 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13138 effect(KILL cr);
13139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140
13141 ins_cost(150);
13142 format %{ "andl $dst, $src\t# int" %}
13143 ins_encode %{
13144 __ andl($dst$$Address, $src$$Register);
13145 %}
13146 ins_pipe(ialu_mem_reg);
13147 %}
13148
13149 // And Memory with Immediate
13150 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13151 %{
13152 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13153 effect(KILL cr);
13154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155
13156 ins_cost(125);
13157 format %{ "andl $dst, $src\t# int" %}
13158 ins_encode %{
13159 __ andl($dst$$Address, $src$$constant);
13160 %}
13161 ins_pipe(ialu_mem_imm);
13162 %}
13163
13164 // BMI1 instructions
13165 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13166 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13167 predicate(UseBMI1Instructions);
13168 effect(KILL cr);
13169 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13170
13171 ins_cost(125);
13172 format %{ "andnl $dst, $src1, $src2" %}
13173
13174 ins_encode %{
13175 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13176 %}
13177 ins_pipe(ialu_reg_mem);
13178 %}
13179
13180 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13181 match(Set dst (AndI (XorI src1 minus_1) src2));
13182 predicate(UseBMI1Instructions);
13183 effect(KILL cr);
13184 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13185
13186 format %{ "andnl $dst, $src1, $src2" %}
13187
13188 ins_encode %{
13189 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13190 %}
13191 ins_pipe(ialu_reg);
13192 %}
13193
13194 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13195 match(Set dst (AndI (SubI imm_zero src) src));
13196 predicate(UseBMI1Instructions);
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13199
13200 format %{ "blsil $dst, $src" %}
13201
13202 ins_encode %{
13203 __ blsil($dst$$Register, $src$$Register);
13204 %}
13205 ins_pipe(ialu_reg);
13206 %}
13207
13208 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13209 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13210 predicate(UseBMI1Instructions);
13211 effect(KILL cr);
13212 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13213
13214 ins_cost(125);
13215 format %{ "blsil $dst, $src" %}
13216
13217 ins_encode %{
13218 __ blsil($dst$$Register, $src$$Address);
13219 %}
13220 ins_pipe(ialu_reg_mem);
13221 %}
13222
13223 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13224 %{
13225 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13226 predicate(UseBMI1Instructions);
13227 effect(KILL cr);
13228 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13229
13230 ins_cost(125);
13231 format %{ "blsmskl $dst, $src" %}
13232
13233 ins_encode %{
13234 __ blsmskl($dst$$Register, $src$$Address);
13235 %}
13236 ins_pipe(ialu_reg_mem);
13237 %}
13238
13239 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13240 %{
13241 match(Set dst (XorI (AddI src minus_1) src));
13242 predicate(UseBMI1Instructions);
13243 effect(KILL cr);
13244 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13245
13246 format %{ "blsmskl $dst, $src" %}
13247
13248 ins_encode %{
13249 __ blsmskl($dst$$Register, $src$$Register);
13250 %}
13251
13252 ins_pipe(ialu_reg);
13253 %}
13254
13255 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13256 %{
13257 match(Set dst (AndI (AddI src minus_1) src) );
13258 predicate(UseBMI1Instructions);
13259 effect(KILL cr);
13260 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13261
13262 format %{ "blsrl $dst, $src" %}
13263
13264 ins_encode %{
13265 __ blsrl($dst$$Register, $src$$Register);
13266 %}
13267
13268 ins_pipe(ialu_reg_mem);
13269 %}
13270
13271 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13272 %{
13273 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13274 predicate(UseBMI1Instructions);
13275 effect(KILL cr);
13276 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13277
13278 ins_cost(125);
13279 format %{ "blsrl $dst, $src" %}
13280
13281 ins_encode %{
13282 __ blsrl($dst$$Register, $src$$Address);
13283 %}
13284
13285 ins_pipe(ialu_reg);
13286 %}
13287
13288 // Or Instructions
13289 // Or Register with Register
13290 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13291 %{
13292 predicate(!UseAPX);
13293 match(Set dst (OrI dst src));
13294 effect(KILL cr);
13295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296
13297 format %{ "orl $dst, $src\t# int" %}
13298 ins_encode %{
13299 __ orl($dst$$Register, $src$$Register);
13300 %}
13301 ins_pipe(ialu_reg_reg);
13302 %}
13303
13304 // Or Register with Register using New Data Destination (NDD)
13305 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13306 %{
13307 predicate(UseAPX);
13308 match(Set dst (OrI src1 src2));
13309 effect(KILL cr);
13310 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13311
13312 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13313 ins_encode %{
13314 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13315 %}
13316 ins_pipe(ialu_reg_reg);
13317 %}
13318
13319 // Or Register with Immediate
13320 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13321 %{
13322 predicate(!UseAPX);
13323 match(Set dst (OrI dst src));
13324 effect(KILL cr);
13325 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13326
13327 format %{ "orl $dst, $src\t# int" %}
13328 ins_encode %{
13329 __ orl($dst$$Register, $src$$constant);
13330 %}
13331 ins_pipe(ialu_reg);
13332 %}
13333
13334 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13335 %{
13336 predicate(UseAPX);
13337 match(Set dst (OrI src1 src2));
13338 effect(KILL cr);
13339 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13340
13341 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13342 ins_encode %{
13343 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13344 %}
13345 ins_pipe(ialu_reg);
13346 %}
13347
13348 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13349 %{
13350 predicate(UseAPX);
13351 match(Set dst (OrI src1 src2));
13352 effect(KILL cr);
13353 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13354
13355 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13356 ins_encode %{
13357 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13358 %}
13359 ins_pipe(ialu_reg);
13360 %}
13361
13362 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13363 %{
13364 predicate(UseAPX);
13365 match(Set dst (OrI (LoadI src1) src2));
13366 effect(KILL cr);
13367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13368
13369 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13370 ins_encode %{
13371 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13372 %}
13373 ins_pipe(ialu_reg);
13374 %}
13375
13376 // Or Register with Memory
13377 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13378 %{
13379 predicate(!UseAPX);
13380 match(Set dst (OrI dst (LoadI src)));
13381 effect(KILL cr);
13382 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13383
13384 ins_cost(150);
13385 format %{ "orl $dst, $src\t# int" %}
13386 ins_encode %{
13387 __ orl($dst$$Register, $src$$Address);
13388 %}
13389 ins_pipe(ialu_reg_mem);
13390 %}
13391
13392 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13393 %{
13394 predicate(UseAPX);
13395 match(Set dst (OrI src1 (LoadI src2)));
13396 effect(KILL cr);
13397 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13398
13399 ins_cost(150);
13400 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13401 ins_encode %{
13402 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13403 %}
13404 ins_pipe(ialu_reg_mem);
13405 %}
13406
13407 // Or Memory with Register
13408 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13409 %{
13410 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13411 effect(KILL cr);
13412 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13413
13414 ins_cost(150);
13415 format %{ "orb $dst, $src\t# byte" %}
13416 ins_encode %{
13417 __ orb($dst$$Address, $src$$Register);
13418 %}
13419 ins_pipe(ialu_mem_reg);
13420 %}
13421
13422 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13423 %{
13424 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13425 effect(KILL cr);
13426 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13427
13428 ins_cost(150);
13429 format %{ "orl $dst, $src\t# int" %}
13430 ins_encode %{
13431 __ orl($dst$$Address, $src$$Register);
13432 %}
13433 ins_pipe(ialu_mem_reg);
13434 %}
13435
13436 // Or Memory with Immediate
13437 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13438 %{
13439 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13440 effect(KILL cr);
13441 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13442
13443 ins_cost(125);
13444 format %{ "orl $dst, $src\t# int" %}
13445 ins_encode %{
13446 __ orl($dst$$Address, $src$$constant);
13447 %}
13448 ins_pipe(ialu_mem_imm);
13449 %}
13450
13451 // Xor Instructions
13452 // Xor Register with Register
13453 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13454 %{
13455 predicate(!UseAPX);
13456 match(Set dst (XorI dst src));
13457 effect(KILL cr);
13458 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13459
13460 format %{ "xorl $dst, $src\t# int" %}
13461 ins_encode %{
13462 __ xorl($dst$$Register, $src$$Register);
13463 %}
13464 ins_pipe(ialu_reg_reg);
13465 %}
13466
13467 // Xor Register with Register using New Data Destination (NDD)
13468 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13469 %{
13470 predicate(UseAPX);
13471 match(Set dst (XorI src1 src2));
13472 effect(KILL cr);
13473 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13474
13475 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13476 ins_encode %{
13477 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13478 %}
13479 ins_pipe(ialu_reg_reg);
13480 %}
13481
13482 // Xor Register with Immediate -1
13483 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13484 %{
13485 predicate(!UseAPX);
13486 match(Set dst (XorI dst imm));
13487
13488 format %{ "notl $dst" %}
13489 ins_encode %{
13490 __ notl($dst$$Register);
13491 %}
13492 ins_pipe(ialu_reg);
13493 %}
13494
13495 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13496 %{
13497 match(Set dst (XorI src imm));
13498 predicate(UseAPX);
13499 flag(PD::Flag_ndd_demotable_opr1);
13500
13501 format %{ "enotl $dst, $src" %}
13502 ins_encode %{
13503 __ enotl($dst$$Register, $src$$Register);
13504 %}
13505 ins_pipe(ialu_reg);
13506 %}
13507
13508 // Xor Register with Immediate
13509 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13510 %{
13511 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13512 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13513 match(Set dst (XorI dst src));
13514 effect(KILL cr);
13515 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13516
13517 format %{ "xorl $dst, $src\t# int" %}
13518 ins_encode %{
13519 __ xorl($dst$$Register, $src$$constant);
13520 %}
13521 ins_pipe(ialu_reg);
13522 %}
13523
13524 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13525 %{
13526 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13527 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13528 match(Set dst (XorI src1 src2));
13529 effect(KILL cr);
13530 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13531
13532 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13533 ins_encode %{
13534 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13535 %}
13536 ins_pipe(ialu_reg);
13537 %}
13538
13539 // Xor Memory with Immediate
13540 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13541 %{
13542 predicate(UseAPX);
13543 match(Set dst (XorI (LoadI src1) src2));
13544 effect(KILL cr);
13545 ins_cost(150);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547
13548 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13549 ins_encode %{
13550 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13551 %}
13552 ins_pipe(ialu_reg);
13553 %}
13554
13555 // Xor Register with Memory
13556 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13557 %{
13558 predicate(!UseAPX);
13559 match(Set dst (XorI dst (LoadI src)));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562
13563 ins_cost(150);
13564 format %{ "xorl $dst, $src\t# int" %}
13565 ins_encode %{
13566 __ xorl($dst$$Register, $src$$Address);
13567 %}
13568 ins_pipe(ialu_reg_mem);
13569 %}
13570
13571 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13572 %{
13573 predicate(UseAPX);
13574 match(Set dst (XorI src1 (LoadI src2)));
13575 effect(KILL cr);
13576 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13577
13578 ins_cost(150);
13579 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13580 ins_encode %{
13581 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13582 %}
13583 ins_pipe(ialu_reg_mem);
13584 %}
13585
13586 // Xor Memory with Register
13587 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13588 %{
13589 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13590 effect(KILL cr);
13591 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13592
13593 ins_cost(150);
13594 format %{ "xorb $dst, $src\t# byte" %}
13595 ins_encode %{
13596 __ xorb($dst$$Address, $src$$Register);
13597 %}
13598 ins_pipe(ialu_mem_reg);
13599 %}
13600
13601 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13602 %{
13603 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13604 effect(KILL cr);
13605 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13606
13607 ins_cost(150);
13608 format %{ "xorl $dst, $src\t# int" %}
13609 ins_encode %{
13610 __ xorl($dst$$Address, $src$$Register);
13611 %}
13612 ins_pipe(ialu_mem_reg);
13613 %}
13614
13615 // Xor Memory with Immediate
13616 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13617 %{
13618 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13619 effect(KILL cr);
13620 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13621
13622 ins_cost(125);
13623 format %{ "xorl $dst, $src\t# int" %}
13624 ins_encode %{
13625 __ xorl($dst$$Address, $src$$constant);
13626 %}
13627 ins_pipe(ialu_mem_imm);
13628 %}
13629
13630
13631 // Long Logical Instructions
13632
13633 // And Instructions
13634 // And Register with Register
13635 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13636 %{
13637 predicate(!UseAPX);
13638 match(Set dst (AndL dst src));
13639 effect(KILL cr);
13640 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13641
13642 format %{ "andq $dst, $src\t# long" %}
13643 ins_encode %{
13644 __ andq($dst$$Register, $src$$Register);
13645 %}
13646 ins_pipe(ialu_reg_reg);
13647 %}
13648
13649 // And Register with Register using New Data Destination (NDD)
13650 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13651 %{
13652 predicate(UseAPX);
13653 match(Set dst (AndL src1 src2));
13654 effect(KILL cr);
13655 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13656
13657 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13658 ins_encode %{
13659 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13660
13661 %}
13662 ins_pipe(ialu_reg_reg);
13663 %}
13664
13665 // And Register with Immediate 255
13666 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13667 %{
13668 match(Set dst (AndL src mask));
13669
13670 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13671 ins_encode %{
13672 // movzbl zeroes out the upper 32-bit and does not need REX.W
13673 __ movzbl($dst$$Register, $src$$Register);
13674 %}
13675 ins_pipe(ialu_reg);
13676 %}
13677
13678 // And Register with Immediate 65535
13679 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13680 %{
13681 match(Set dst (AndL src mask));
13682
13683 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13684 ins_encode %{
13685 // movzwl zeroes out the upper 32-bit and does not need REX.W
13686 __ movzwl($dst$$Register, $src$$Register);
13687 %}
13688 ins_pipe(ialu_reg);
13689 %}
13690
13691 // And Register with Immediate
13692 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13693 %{
13694 predicate(!UseAPX);
13695 match(Set dst (AndL dst src));
13696 effect(KILL cr);
13697 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13698
13699 format %{ "andq $dst, $src\t# long" %}
13700 ins_encode %{
13701 __ andq($dst$$Register, $src$$constant);
13702 %}
13703 ins_pipe(ialu_reg);
13704 %}
13705
13706 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13707 %{
13708 predicate(UseAPX);
13709 match(Set dst (AndL src1 src2));
13710 effect(KILL cr);
13711 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13712
13713 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13714 ins_encode %{
13715 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13716 %}
13717 ins_pipe(ialu_reg);
13718 %}
13719
13720 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13721 %{
13722 predicate(UseAPX);
13723 match(Set dst (AndL (LoadL src1) src2));
13724 effect(KILL cr);
13725 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13726
13727 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13728 ins_encode %{
13729 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13730 %}
13731 ins_pipe(ialu_reg);
13732 %}
13733
13734 // And Register with Memory
13735 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13736 %{
13737 predicate(!UseAPX);
13738 match(Set dst (AndL dst (LoadL src)));
13739 effect(KILL cr);
13740 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13741
13742 ins_cost(150);
13743 format %{ "andq $dst, $src\t# long" %}
13744 ins_encode %{
13745 __ andq($dst$$Register, $src$$Address);
13746 %}
13747 ins_pipe(ialu_reg_mem);
13748 %}
13749
13750 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13751 %{
13752 predicate(UseAPX);
13753 match(Set dst (AndL src1 (LoadL src2)));
13754 effect(KILL cr);
13755 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13756
13757 ins_cost(150);
13758 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13759 ins_encode %{
13760 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13761 %}
13762 ins_pipe(ialu_reg_mem);
13763 %}
13764
13765 // And Memory with Register
13766 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13767 %{
13768 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13769 effect(KILL cr);
13770 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13771
13772 ins_cost(150);
13773 format %{ "andq $dst, $src\t# long" %}
13774 ins_encode %{
13775 __ andq($dst$$Address, $src$$Register);
13776 %}
13777 ins_pipe(ialu_mem_reg);
13778 %}
13779
13780 // And Memory with Immediate
13781 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13782 %{
13783 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13784 effect(KILL cr);
13785 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13786
13787 ins_cost(125);
13788 format %{ "andq $dst, $src\t# long" %}
13789 ins_encode %{
13790 __ andq($dst$$Address, $src$$constant);
13791 %}
13792 ins_pipe(ialu_mem_imm);
13793 %}
13794
13795 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13796 %{
13797 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13798 // because AND/OR works well enough for 8/32-bit values.
13799 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13800
13801 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13802 effect(KILL cr);
13803
13804 ins_cost(125);
13805 format %{ "btrq $dst, log2(not($con))\t# long" %}
13806 ins_encode %{
13807 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13808 %}
13809 ins_pipe(ialu_mem_imm);
13810 %}
13811
13812 // BMI1 instructions
13813 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13814 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13815 predicate(UseBMI1Instructions);
13816 effect(KILL cr);
13817 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13818
13819 ins_cost(125);
13820 format %{ "andnq $dst, $src1, $src2" %}
13821
13822 ins_encode %{
13823 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13824 %}
13825 ins_pipe(ialu_reg_mem);
13826 %}
13827
13828 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13829 match(Set dst (AndL (XorL src1 minus_1) src2));
13830 predicate(UseBMI1Instructions);
13831 effect(KILL cr);
13832 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13833
13834 format %{ "andnq $dst, $src1, $src2" %}
13835
13836 ins_encode %{
13837 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13838 %}
13839 ins_pipe(ialu_reg_mem);
13840 %}
13841
13842 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13843 match(Set dst (AndL (SubL imm_zero src) src));
13844 predicate(UseBMI1Instructions);
13845 effect(KILL cr);
13846 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13847
13848 format %{ "blsiq $dst, $src" %}
13849
13850 ins_encode %{
13851 __ blsiq($dst$$Register, $src$$Register);
13852 %}
13853 ins_pipe(ialu_reg);
13854 %}
13855
13856 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13857 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13858 predicate(UseBMI1Instructions);
13859 effect(KILL cr);
13860 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13861
13862 ins_cost(125);
13863 format %{ "blsiq $dst, $src" %}
13864
13865 ins_encode %{
13866 __ blsiq($dst$$Register, $src$$Address);
13867 %}
13868 ins_pipe(ialu_reg_mem);
13869 %}
13870
13871 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13872 %{
13873 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13874 predicate(UseBMI1Instructions);
13875 effect(KILL cr);
13876 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13877
13878 ins_cost(125);
13879 format %{ "blsmskq $dst, $src" %}
13880
13881 ins_encode %{
13882 __ blsmskq($dst$$Register, $src$$Address);
13883 %}
13884 ins_pipe(ialu_reg_mem);
13885 %}
13886
13887 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13888 %{
13889 match(Set dst (XorL (AddL src minus_1) src));
13890 predicate(UseBMI1Instructions);
13891 effect(KILL cr);
13892 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13893
13894 format %{ "blsmskq $dst, $src" %}
13895
13896 ins_encode %{
13897 __ blsmskq($dst$$Register, $src$$Register);
13898 %}
13899
13900 ins_pipe(ialu_reg);
13901 %}
13902
13903 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13904 %{
13905 match(Set dst (AndL (AddL src minus_1) src) );
13906 predicate(UseBMI1Instructions);
13907 effect(KILL cr);
13908 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13909
13910 format %{ "blsrq $dst, $src" %}
13911
13912 ins_encode %{
13913 __ blsrq($dst$$Register, $src$$Register);
13914 %}
13915
13916 ins_pipe(ialu_reg);
13917 %}
13918
13919 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13920 %{
13921 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13922 predicate(UseBMI1Instructions);
13923 effect(KILL cr);
13924 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13925
13926 ins_cost(125);
13927 format %{ "blsrq $dst, $src" %}
13928
13929 ins_encode %{
13930 __ blsrq($dst$$Register, $src$$Address);
13931 %}
13932
13933 ins_pipe(ialu_reg);
13934 %}
13935
13936 // Or Instructions
13937 // Or Register with Register
13938 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13939 %{
13940 predicate(!UseAPX);
13941 match(Set dst (OrL dst src));
13942 effect(KILL cr);
13943 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13944
13945 format %{ "orq $dst, $src\t# long" %}
13946 ins_encode %{
13947 __ orq($dst$$Register, $src$$Register);
13948 %}
13949 ins_pipe(ialu_reg_reg);
13950 %}
13951
13952 // Or Register with Register using New Data Destination (NDD)
13953 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13954 %{
13955 predicate(UseAPX);
13956 match(Set dst (OrL src1 src2));
13957 effect(KILL cr);
13958 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13959
13960 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13961 ins_encode %{
13962 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13963
13964 %}
13965 ins_pipe(ialu_reg_reg);
13966 %}
13967
13968 // Use any_RegP to match R15 (TLS register) without spilling.
13969 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13970 match(Set dst (OrL dst (CastP2X src)));
13971 effect(KILL cr);
13972 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13973
13974 format %{ "orq $dst, $src\t# long" %}
13975 ins_encode %{
13976 __ orq($dst$$Register, $src$$Register);
13977 %}
13978 ins_pipe(ialu_reg_reg);
13979 %}
13980
13981 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13982 match(Set dst (OrL src1 (CastP2X src2)));
13983 effect(KILL cr);
13984 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13985
13986 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13987 ins_encode %{
13988 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13989 %}
13990 ins_pipe(ialu_reg_reg);
13991 %}
13992
13993 // Or Register with Immediate
13994 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13995 %{
13996 predicate(!UseAPX);
13997 match(Set dst (OrL dst src));
13998 effect(KILL cr);
13999 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14000
14001 format %{ "orq $dst, $src\t# long" %}
14002 ins_encode %{
14003 __ orq($dst$$Register, $src$$constant);
14004 %}
14005 ins_pipe(ialu_reg);
14006 %}
14007
14008 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14009 %{
14010 predicate(UseAPX);
14011 match(Set dst (OrL src1 src2));
14012 effect(KILL cr);
14013 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14014
14015 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14016 ins_encode %{
14017 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14018 %}
14019 ins_pipe(ialu_reg);
14020 %}
14021
14022 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14023 %{
14024 predicate(UseAPX);
14025 match(Set dst (OrL src1 src2));
14026 effect(KILL cr);
14027 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14028
14029 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14030 ins_encode %{
14031 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14032 %}
14033 ins_pipe(ialu_reg);
14034 %}
14035
14036 // Or Memory with Immediate
14037 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14038 %{
14039 predicate(UseAPX);
14040 match(Set dst (OrL (LoadL src1) src2));
14041 effect(KILL cr);
14042 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14043
14044 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14045 ins_encode %{
14046 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14047 %}
14048 ins_pipe(ialu_reg);
14049 %}
14050
14051 // Or Register with Memory
14052 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14053 %{
14054 predicate(!UseAPX);
14055 match(Set dst (OrL dst (LoadL src)));
14056 effect(KILL cr);
14057 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14058
14059 ins_cost(150);
14060 format %{ "orq $dst, $src\t# long" %}
14061 ins_encode %{
14062 __ orq($dst$$Register, $src$$Address);
14063 %}
14064 ins_pipe(ialu_reg_mem);
14065 %}
14066
14067 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14068 %{
14069 predicate(UseAPX);
14070 match(Set dst (OrL src1 (LoadL src2)));
14071 effect(KILL cr);
14072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14073
14074 ins_cost(150);
14075 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14076 ins_encode %{
14077 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14078 %}
14079 ins_pipe(ialu_reg_mem);
14080 %}
14081
14082 // Or Memory with Register
14083 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14084 %{
14085 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14086 effect(KILL cr);
14087 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14088
14089 ins_cost(150);
14090 format %{ "orq $dst, $src\t# long" %}
14091 ins_encode %{
14092 __ orq($dst$$Address, $src$$Register);
14093 %}
14094 ins_pipe(ialu_mem_reg);
14095 %}
14096
14097 // Or Memory with Immediate
14098 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14099 %{
14100 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14101 effect(KILL cr);
14102 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14103
14104 ins_cost(125);
14105 format %{ "orq $dst, $src\t# long" %}
14106 ins_encode %{
14107 __ orq($dst$$Address, $src$$constant);
14108 %}
14109 ins_pipe(ialu_mem_imm);
14110 %}
14111
14112 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14113 %{
14114 // con should be a pure 64-bit power of 2 immediate
14115 // because AND/OR works well enough for 8/32-bit values.
14116 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14117
14118 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14119 effect(KILL cr);
14120
14121 ins_cost(125);
14122 format %{ "btsq $dst, log2($con)\t# long" %}
14123 ins_encode %{
14124 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14125 %}
14126 ins_pipe(ialu_mem_imm);
14127 %}
14128
14129 // Xor Instructions
14130 // Xor Register with Register
14131 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14132 %{
14133 predicate(!UseAPX);
14134 match(Set dst (XorL dst src));
14135 effect(KILL cr);
14136 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14137
14138 format %{ "xorq $dst, $src\t# long" %}
14139 ins_encode %{
14140 __ xorq($dst$$Register, $src$$Register);
14141 %}
14142 ins_pipe(ialu_reg_reg);
14143 %}
14144
14145 // Xor Register with Register using New Data Destination (NDD)
14146 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14147 %{
14148 predicate(UseAPX);
14149 match(Set dst (XorL src1 src2));
14150 effect(KILL cr);
14151 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14152
14153 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14154 ins_encode %{
14155 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14156 %}
14157 ins_pipe(ialu_reg_reg);
14158 %}
14159
14160 // Xor Register with Immediate -1
14161 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14162 %{
14163 predicate(!UseAPX);
14164 match(Set dst (XorL dst imm));
14165
14166 format %{ "notq $dst" %}
14167 ins_encode %{
14168 __ notq($dst$$Register);
14169 %}
14170 ins_pipe(ialu_reg);
14171 %}
14172
14173 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14174 %{
14175 predicate(UseAPX);
14176 match(Set dst (XorL src imm));
14177 flag(PD::Flag_ndd_demotable_opr1);
14178
14179 format %{ "enotq $dst, $src" %}
14180 ins_encode %{
14181 __ enotq($dst$$Register, $src$$Register);
14182 %}
14183 ins_pipe(ialu_reg);
14184 %}
14185
14186 // Xor Register with Immediate
14187 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14188 %{
14189 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14190 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14191 match(Set dst (XorL dst src));
14192 effect(KILL cr);
14193 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14194
14195 format %{ "xorq $dst, $src\t# long" %}
14196 ins_encode %{
14197 __ xorq($dst$$Register, $src$$constant);
14198 %}
14199 ins_pipe(ialu_reg);
14200 %}
14201
14202 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14203 %{
14204 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14205 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14206 match(Set dst (XorL src1 src2));
14207 effect(KILL cr);
14208 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14209
14210 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14211 ins_encode %{
14212 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14213 %}
14214 ins_pipe(ialu_reg);
14215 %}
14216
14217 // Xor Memory with Immediate
14218 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14219 %{
14220 predicate(UseAPX);
14221 match(Set dst (XorL (LoadL src1) src2));
14222 effect(KILL cr);
14223 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14224 ins_cost(150);
14225
14226 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14227 ins_encode %{
14228 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14229 %}
14230 ins_pipe(ialu_reg);
14231 %}
14232
14233 // Xor Register with Memory
14234 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14235 %{
14236 predicate(!UseAPX);
14237 match(Set dst (XorL dst (LoadL src)));
14238 effect(KILL cr);
14239 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14240
14241 ins_cost(150);
14242 format %{ "xorq $dst, $src\t# long" %}
14243 ins_encode %{
14244 __ xorq($dst$$Register, $src$$Address);
14245 %}
14246 ins_pipe(ialu_reg_mem);
14247 %}
14248
14249 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14250 %{
14251 predicate(UseAPX);
14252 match(Set dst (XorL src1 (LoadL src2)));
14253 effect(KILL cr);
14254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14255
14256 ins_cost(150);
14257 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14258 ins_encode %{
14259 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14260 %}
14261 ins_pipe(ialu_reg_mem);
14262 %}
14263
14264 // Xor Memory with Register
14265 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14266 %{
14267 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14268 effect(KILL cr);
14269 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14270
14271 ins_cost(150);
14272 format %{ "xorq $dst, $src\t# long" %}
14273 ins_encode %{
14274 __ xorq($dst$$Address, $src$$Register);
14275 %}
14276 ins_pipe(ialu_mem_reg);
14277 %}
14278
14279 // Xor Memory with Immediate
14280 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14281 %{
14282 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14283 effect(KILL cr);
14284 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14285
14286 ins_cost(125);
14287 format %{ "xorq $dst, $src\t# long" %}
14288 ins_encode %{
14289 __ xorq($dst$$Address, $src$$constant);
14290 %}
14291 ins_pipe(ialu_mem_imm);
14292 %}
14293
14294 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14295 %{
14296 match(Set dst (CmpLTMask p q));
14297 effect(KILL cr);
14298
14299 ins_cost(400);
14300 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14301 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14302 "negl $dst" %}
14303 ins_encode %{
14304 __ cmpl($p$$Register, $q$$Register);
14305 __ setcc(Assembler::less, $dst$$Register);
14306 __ negl($dst$$Register);
14307 %}
14308 ins_pipe(pipe_slow);
14309 %}
14310
14311 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14312 %{
14313 match(Set dst (CmpLTMask dst zero));
14314 effect(KILL cr);
14315
14316 ins_cost(100);
14317 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14318 ins_encode %{
14319 __ sarl($dst$$Register, 31);
14320 %}
14321 ins_pipe(ialu_reg);
14322 %}
14323
14324 /* Better to save a register than avoid a branch */
14325 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14326 %{
14327 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14328 effect(KILL cr);
14329 ins_cost(300);
14330 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14331 "jge done\n\t"
14332 "addl $p,$y\n"
14333 "done: " %}
14334 ins_encode %{
14335 Register Rp = $p$$Register;
14336 Register Rq = $q$$Register;
14337 Register Ry = $y$$Register;
14338 Label done;
14339 __ subl(Rp, Rq);
14340 __ jccb(Assembler::greaterEqual, done);
14341 __ addl(Rp, Ry);
14342 __ bind(done);
14343 %}
14344 ins_pipe(pipe_cmplt);
14345 %}
14346
14347 /* Better to save a register than avoid a branch */
14348 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14349 %{
14350 match(Set y (AndI (CmpLTMask p q) y));
14351 effect(KILL cr);
14352
14353 ins_cost(300);
14354
14355 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14356 "jlt done\n\t"
14357 "xorl $y, $y\n"
14358 "done: " %}
14359 ins_encode %{
14360 Register Rp = $p$$Register;
14361 Register Rq = $q$$Register;
14362 Register Ry = $y$$Register;
14363 Label done;
14364 __ cmpl(Rp, Rq);
14365 __ jccb(Assembler::less, done);
14366 __ xorl(Ry, Ry);
14367 __ bind(done);
14368 %}
14369 ins_pipe(pipe_cmplt);
14370 %}
14371
14372
14373 //---------- FP Instructions------------------------------------------------
14374
14375 // Really expensive, avoid
14376 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14377 %{
14378 match(Set cr (CmpF src1 src2));
14379
14380 ins_cost(500);
14381 format %{ "ucomiss $src1, $src2\n\t"
14382 "jnp,s exit\n\t"
14383 "pushfq\t# saw NaN, set CF\n\t"
14384 "andq [rsp], #0xffffff2b\n\t"
14385 "popfq\n"
14386 "exit:" %}
14387 ins_encode %{
14388 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14389 emit_cmpfp_fixup(masm);
14390 %}
14391 ins_pipe(pipe_slow);
14392 %}
14393
14394 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14395 match(Set cr (CmpF src1 src2));
14396
14397 ins_cost(100);
14398 format %{ "ucomiss $src1, $src2" %}
14399 ins_encode %{
14400 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14401 %}
14402 ins_pipe(pipe_slow);
14403 %}
14404
14405 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14406 match(Set cr (CmpF src1 src2));
14407
14408 ins_cost(100);
14409 format %{ "vucomxss $src1, $src2" %}
14410 ins_encode %{
14411 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14412 %}
14413 ins_pipe(pipe_slow);
14414 %}
14415
14416 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14417 match(Set cr (CmpF src1 (LoadF src2)));
14418
14419 ins_cost(100);
14420 format %{ "ucomiss $src1, $src2" %}
14421 ins_encode %{
14422 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14423 %}
14424 ins_pipe(pipe_slow);
14425 %}
14426
14427 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14428 match(Set cr (CmpF src1 (LoadF src2)));
14429
14430 ins_cost(100);
14431 format %{ "vucomxss $src1, $src2" %}
14432 ins_encode %{
14433 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14434 %}
14435 ins_pipe(pipe_slow);
14436 %}
14437
14438 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14439 match(Set cr (CmpF src con));
14440
14441 ins_cost(100);
14442 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14443 ins_encode %{
14444 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14445 %}
14446 ins_pipe(pipe_slow);
14447 %}
14448
14449 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14450 match(Set cr (CmpF src con));
14451
14452 ins_cost(100);
14453 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14454 ins_encode %{
14455 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14456 %}
14457 ins_pipe(pipe_slow);
14458 %}
14459
14460 // Really expensive, avoid
14461 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14462 %{
14463 match(Set cr (CmpD src1 src2));
14464
14465 ins_cost(500);
14466 format %{ "ucomisd $src1, $src2\n\t"
14467 "jnp,s exit\n\t"
14468 "pushfq\t# saw NaN, set CF\n\t"
14469 "andq [rsp], #0xffffff2b\n\t"
14470 "popfq\n"
14471 "exit:" %}
14472 ins_encode %{
14473 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14474 emit_cmpfp_fixup(masm);
14475 %}
14476 ins_pipe(pipe_slow);
14477 %}
14478
14479 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14480 match(Set cr (CmpD src1 src2));
14481
14482 ins_cost(100);
14483 format %{ "ucomisd $src1, $src2 test" %}
14484 ins_encode %{
14485 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14486 %}
14487 ins_pipe(pipe_slow);
14488 %}
14489
14490 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14491 match(Set cr (CmpD src1 src2));
14492
14493 ins_cost(100);
14494 format %{ "vucomxsd $src1, $src2 test" %}
14495 ins_encode %{
14496 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14497 %}
14498 ins_pipe(pipe_slow);
14499 %}
14500
14501 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14502 match(Set cr (CmpD src1 (LoadD src2)));
14503
14504 ins_cost(100);
14505 format %{ "ucomisd $src1, $src2" %}
14506 ins_encode %{
14507 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14508 %}
14509 ins_pipe(pipe_slow);
14510 %}
14511
14512 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14513 match(Set cr (CmpD src1 (LoadD src2)));
14514
14515 ins_cost(100);
14516 format %{ "vucomxsd $src1, $src2" %}
14517 ins_encode %{
14518 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14519 %}
14520 ins_pipe(pipe_slow);
14521 %}
14522
14523 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14524 match(Set cr (CmpD src con));
14525 ins_cost(100);
14526 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14527 ins_encode %{
14528 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14529 %}
14530 ins_pipe(pipe_slow);
14531 %}
14532
14533 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14534 match(Set cr (CmpD src con));
14535
14536 ins_cost(100);
14537 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14538 ins_encode %{
14539 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14540 %}
14541 ins_pipe(pipe_slow);
14542 %}
14543
14544 // Compare into -1,0,1
14545 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14546 %{
14547 match(Set dst (CmpF3 src1 src2));
14548 effect(KILL cr);
14549
14550 ins_cost(275);
14551 format %{ "ucomiss $src1, $src2\n\t"
14552 "movl $dst, #-1\n\t"
14553 "jp,s done\n\t"
14554 "jb,s done\n\t"
14555 "setne $dst\n\t"
14556 "movzbl $dst, $dst\n"
14557 "done:" %}
14558 ins_encode %{
14559 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14560 emit_cmpfp3(masm, $dst$$Register);
14561 %}
14562 ins_pipe(pipe_slow);
14563 %}
14564
14565 // Compare into -1,0,1
14566 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14567 %{
14568 match(Set dst (CmpF3 src1 (LoadF src2)));
14569 effect(KILL cr);
14570
14571 ins_cost(275);
14572 format %{ "ucomiss $src1, $src2\n\t"
14573 "movl $dst, #-1\n\t"
14574 "jp,s done\n\t"
14575 "jb,s done\n\t"
14576 "setne $dst\n\t"
14577 "movzbl $dst, $dst\n"
14578 "done:" %}
14579 ins_encode %{
14580 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14581 emit_cmpfp3(masm, $dst$$Register);
14582 %}
14583 ins_pipe(pipe_slow);
14584 %}
14585
14586 // Compare into -1,0,1
14587 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14588 match(Set dst (CmpF3 src con));
14589 effect(KILL cr);
14590
14591 ins_cost(275);
14592 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14593 "movl $dst, #-1\n\t"
14594 "jp,s done\n\t"
14595 "jb,s done\n\t"
14596 "setne $dst\n\t"
14597 "movzbl $dst, $dst\n"
14598 "done:" %}
14599 ins_encode %{
14600 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14601 emit_cmpfp3(masm, $dst$$Register);
14602 %}
14603 ins_pipe(pipe_slow);
14604 %}
14605
14606 // Compare into -1,0,1
14607 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14608 %{
14609 match(Set dst (CmpD3 src1 src2));
14610 effect(KILL cr);
14611
14612 ins_cost(275);
14613 format %{ "ucomisd $src1, $src2\n\t"
14614 "movl $dst, #-1\n\t"
14615 "jp,s done\n\t"
14616 "jb,s done\n\t"
14617 "setne $dst\n\t"
14618 "movzbl $dst, $dst\n"
14619 "done:" %}
14620 ins_encode %{
14621 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14622 emit_cmpfp3(masm, $dst$$Register);
14623 %}
14624 ins_pipe(pipe_slow);
14625 %}
14626
14627 // Compare into -1,0,1
14628 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14629 %{
14630 match(Set dst (CmpD3 src1 (LoadD src2)));
14631 effect(KILL cr);
14632
14633 ins_cost(275);
14634 format %{ "ucomisd $src1, $src2\n\t"
14635 "movl $dst, #-1\n\t"
14636 "jp,s done\n\t"
14637 "jb,s done\n\t"
14638 "setne $dst\n\t"
14639 "movzbl $dst, $dst\n"
14640 "done:" %}
14641 ins_encode %{
14642 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14643 emit_cmpfp3(masm, $dst$$Register);
14644 %}
14645 ins_pipe(pipe_slow);
14646 %}
14647
14648 // Compare into -1,0,1
14649 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14650 match(Set dst (CmpD3 src con));
14651 effect(KILL cr);
14652
14653 ins_cost(275);
14654 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14655 "movl $dst, #-1\n\t"
14656 "jp,s done\n\t"
14657 "jb,s done\n\t"
14658 "setne $dst\n\t"
14659 "movzbl $dst, $dst\n"
14660 "done:" %}
14661 ins_encode %{
14662 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14663 emit_cmpfp3(masm, $dst$$Register);
14664 %}
14665 ins_pipe(pipe_slow);
14666 %}
14667
14668 //----------Arithmetic Conversion Instructions---------------------------------
14669
14670 instruct convF2D_reg_reg(regD dst, regF src)
14671 %{
14672 match(Set dst (ConvF2D src));
14673
14674 format %{ "cvtss2sd $dst, $src" %}
14675 ins_encode %{
14676 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14677 %}
14678 ins_pipe(pipe_slow); // XXX
14679 %}
14680
14681 instruct convF2D_reg_mem(regD dst, memory src)
14682 %{
14683 predicate(UseAVX == 0);
14684 match(Set dst (ConvF2D (LoadF src)));
14685
14686 format %{ "cvtss2sd $dst, $src" %}
14687 ins_encode %{
14688 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14689 %}
14690 ins_pipe(pipe_slow); // XXX
14691 %}
14692
14693 instruct convD2F_reg_reg(regF dst, regD src)
14694 %{
14695 match(Set dst (ConvD2F src));
14696
14697 format %{ "cvtsd2ss $dst, $src" %}
14698 ins_encode %{
14699 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14700 %}
14701 ins_pipe(pipe_slow); // XXX
14702 %}
14703
14704 instruct convD2F_reg_mem(regF dst, memory src)
14705 %{
14706 predicate(UseAVX == 0);
14707 match(Set dst (ConvD2F (LoadD src)));
14708
14709 format %{ "cvtsd2ss $dst, $src" %}
14710 ins_encode %{
14711 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14712 %}
14713 ins_pipe(pipe_slow); // XXX
14714 %}
14715
14716 // XXX do mem variants
14717 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14718 %{
14719 predicate(!VM_Version::supports_avx10_2());
14720 match(Set dst (ConvF2I src));
14721 effect(KILL cr);
14722 format %{ "convert_f2i $dst, $src" %}
14723 ins_encode %{
14724 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14725 %}
14726 ins_pipe(pipe_slow);
14727 %}
14728
14729 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14730 %{
14731 predicate(VM_Version::supports_avx10_2());
14732 match(Set dst (ConvF2I src));
14733 format %{ "evcvttss2sisl $dst, $src" %}
14734 ins_encode %{
14735 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14736 %}
14737 ins_pipe(pipe_slow);
14738 %}
14739
14740 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14741 %{
14742 predicate(VM_Version::supports_avx10_2());
14743 match(Set dst (ConvF2I (LoadF src)));
14744 format %{ "evcvttss2sisl $dst, $src" %}
14745 ins_encode %{
14746 __ evcvttss2sisl($dst$$Register, $src$$Address);
14747 %}
14748 ins_pipe(pipe_slow);
14749 %}
14750
14751 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14752 %{
14753 predicate(!VM_Version::supports_avx10_2());
14754 match(Set dst (ConvF2L src));
14755 effect(KILL cr);
14756 format %{ "convert_f2l $dst, $src"%}
14757 ins_encode %{
14758 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14759 %}
14760 ins_pipe(pipe_slow);
14761 %}
14762
14763 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14764 %{
14765 predicate(VM_Version::supports_avx10_2());
14766 match(Set dst (ConvF2L src));
14767 format %{ "evcvttss2sisq $dst, $src" %}
14768 ins_encode %{
14769 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14770 %}
14771 ins_pipe(pipe_slow);
14772 %}
14773
14774 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14775 %{
14776 predicate(VM_Version::supports_avx10_2());
14777 match(Set dst (ConvF2L (LoadF src)));
14778 format %{ "evcvttss2sisq $dst, $src" %}
14779 ins_encode %{
14780 __ evcvttss2sisq($dst$$Register, $src$$Address);
14781 %}
14782 ins_pipe(pipe_slow);
14783 %}
14784
14785 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14786 %{
14787 predicate(!VM_Version::supports_avx10_2());
14788 match(Set dst (ConvD2I src));
14789 effect(KILL cr);
14790 format %{ "convert_d2i $dst, $src"%}
14791 ins_encode %{
14792 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14793 %}
14794 ins_pipe(pipe_slow);
14795 %}
14796
14797 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14798 %{
14799 predicate(VM_Version::supports_avx10_2());
14800 match(Set dst (ConvD2I src));
14801 format %{ "evcvttsd2sisl $dst, $src" %}
14802 ins_encode %{
14803 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14804 %}
14805 ins_pipe(pipe_slow);
14806 %}
14807
14808 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14809 %{
14810 predicate(VM_Version::supports_avx10_2());
14811 match(Set dst (ConvD2I (LoadD src)));
14812 format %{ "evcvttsd2sisl $dst, $src" %}
14813 ins_encode %{
14814 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14815 %}
14816 ins_pipe(pipe_slow);
14817 %}
14818
14819 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14820 %{
14821 predicate(!VM_Version::supports_avx10_2());
14822 match(Set dst (ConvD2L src));
14823 effect(KILL cr);
14824 format %{ "convert_d2l $dst, $src"%}
14825 ins_encode %{
14826 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14827 %}
14828 ins_pipe(pipe_slow);
14829 %}
14830
14831 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14832 %{
14833 predicate(VM_Version::supports_avx10_2());
14834 match(Set dst (ConvD2L src));
14835 format %{ "evcvttsd2sisq $dst, $src" %}
14836 ins_encode %{
14837 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14838 %}
14839 ins_pipe(pipe_slow);
14840 %}
14841
14842 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14843 %{
14844 predicate(VM_Version::supports_avx10_2());
14845 match(Set dst (ConvD2L (LoadD src)));
14846 format %{ "evcvttsd2sisq $dst, $src" %}
14847 ins_encode %{
14848 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14849 %}
14850 ins_pipe(pipe_slow);
14851 %}
14852
14853 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14854 %{
14855 match(Set dst (RoundD src));
14856 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14857 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14858 ins_encode %{
14859 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14860 %}
14861 ins_pipe(pipe_slow);
14862 %}
14863
14864 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14865 %{
14866 match(Set dst (RoundF src));
14867 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14868 format %{ "round_float $dst,$src" %}
14869 ins_encode %{
14870 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14871 %}
14872 ins_pipe(pipe_slow);
14873 %}
14874
14875 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14876 %{
14877 predicate(!UseXmmI2F);
14878 match(Set dst (ConvI2F src));
14879
14880 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14881 ins_encode %{
14882 if (UseAVX > 0) {
14883 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14884 }
14885 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14886 %}
14887 ins_pipe(pipe_slow); // XXX
14888 %}
14889
14890 instruct convI2F_reg_mem(regF dst, memory src)
14891 %{
14892 predicate(UseAVX == 0);
14893 match(Set dst (ConvI2F (LoadI src)));
14894
14895 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14896 ins_encode %{
14897 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14898 %}
14899 ins_pipe(pipe_slow); // XXX
14900 %}
14901
14902 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14903 %{
14904 predicate(!UseXmmI2D);
14905 match(Set dst (ConvI2D src));
14906
14907 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14908 ins_encode %{
14909 if (UseAVX > 0) {
14910 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14911 }
14912 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14913 %}
14914 ins_pipe(pipe_slow); // XXX
14915 %}
14916
14917 instruct convI2D_reg_mem(regD dst, memory src)
14918 %{
14919 predicate(UseAVX == 0);
14920 match(Set dst (ConvI2D (LoadI src)));
14921
14922 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14923 ins_encode %{
14924 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14925 %}
14926 ins_pipe(pipe_slow); // XXX
14927 %}
14928
14929 instruct convXI2F_reg(regF dst, rRegI src)
14930 %{
14931 predicate(UseXmmI2F);
14932 match(Set dst (ConvI2F src));
14933
14934 format %{ "movdl $dst, $src\n\t"
14935 "cvtdq2psl $dst, $dst\t# i2f" %}
14936 ins_encode %{
14937 __ movdl($dst$$XMMRegister, $src$$Register);
14938 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14939 %}
14940 ins_pipe(pipe_slow); // XXX
14941 %}
14942
14943 instruct convXI2D_reg(regD dst, rRegI src)
14944 %{
14945 predicate(UseXmmI2D);
14946 match(Set dst (ConvI2D src));
14947
14948 format %{ "movdl $dst, $src\n\t"
14949 "cvtdq2pdl $dst, $dst\t# i2d" %}
14950 ins_encode %{
14951 __ movdl($dst$$XMMRegister, $src$$Register);
14952 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14953 %}
14954 ins_pipe(pipe_slow); // XXX
14955 %}
14956
14957 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14958 %{
14959 match(Set dst (ConvL2F src));
14960
14961 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14962 ins_encode %{
14963 if (UseAVX > 0) {
14964 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14965 }
14966 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14967 %}
14968 ins_pipe(pipe_slow); // XXX
14969 %}
14970
14971 instruct convL2F_reg_mem(regF dst, memory src)
14972 %{
14973 predicate(UseAVX == 0);
14974 match(Set dst (ConvL2F (LoadL src)));
14975
14976 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14977 ins_encode %{
14978 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14979 %}
14980 ins_pipe(pipe_slow); // XXX
14981 %}
14982
14983 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14984 %{
14985 match(Set dst (ConvL2D src));
14986
14987 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14988 ins_encode %{
14989 if (UseAVX > 0) {
14990 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14991 }
14992 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14993 %}
14994 ins_pipe(pipe_slow); // XXX
14995 %}
14996
14997 instruct convL2D_reg_mem(regD dst, memory src)
14998 %{
14999 predicate(UseAVX == 0);
15000 match(Set dst (ConvL2D (LoadL src)));
15001
15002 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15003 ins_encode %{
15004 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15005 %}
15006 ins_pipe(pipe_slow); // XXX
15007 %}
15008
15009 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15010 %{
15011 match(Set dst (ConvI2L src));
15012
15013 ins_cost(125);
15014 format %{ "movslq $dst, $src\t# i2l" %}
15015 ins_encode %{
15016 __ movslq($dst$$Register, $src$$Register);
15017 %}
15018 ins_pipe(ialu_reg_reg);
15019 %}
15020
15021 // Zero-extend convert int to long
15022 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15023 %{
15024 match(Set dst (AndL (ConvI2L src) mask));
15025
15026 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15027 ins_encode %{
15028 if ($dst$$reg != $src$$reg) {
15029 __ movl($dst$$Register, $src$$Register);
15030 }
15031 %}
15032 ins_pipe(ialu_reg_reg);
15033 %}
15034
15035 // Zero-extend convert int to long
15036 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15037 %{
15038 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15039
15040 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15041 ins_encode %{
15042 __ movl($dst$$Register, $src$$Address);
15043 %}
15044 ins_pipe(ialu_reg_mem);
15045 %}
15046
15047 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15048 %{
15049 match(Set dst (AndL src mask));
15050
15051 format %{ "movl $dst, $src\t# zero-extend long" %}
15052 ins_encode %{
15053 __ movl($dst$$Register, $src$$Register);
15054 %}
15055 ins_pipe(ialu_reg_reg);
15056 %}
15057
15058 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15059 %{
15060 match(Set dst (ConvL2I src));
15061
15062 format %{ "movl $dst, $src\t# l2i" %}
15063 ins_encode %{
15064 __ movl($dst$$Register, $src$$Register);
15065 %}
15066 ins_pipe(ialu_reg_reg);
15067 %}
15068
15069
15070 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15071 match(Set dst (MoveF2I src));
15072 effect(DEF dst, USE src);
15073
15074 ins_cost(125);
15075 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15076 ins_encode %{
15077 __ movl($dst$$Register, Address(rsp, $src$$disp));
15078 %}
15079 ins_pipe(ialu_reg_mem);
15080 %}
15081
15082 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15083 match(Set dst (MoveI2F src));
15084 effect(DEF dst, USE src);
15085
15086 ins_cost(125);
15087 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15088 ins_encode %{
15089 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15090 %}
15091 ins_pipe(pipe_slow);
15092 %}
15093
15094 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15095 match(Set dst (MoveD2L src));
15096 effect(DEF dst, USE src);
15097
15098 ins_cost(125);
15099 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15100 ins_encode %{
15101 __ movq($dst$$Register, Address(rsp, $src$$disp));
15102 %}
15103 ins_pipe(ialu_reg_mem);
15104 %}
15105
15106 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15107 predicate(!UseXmmLoadAndClearUpper);
15108 match(Set dst (MoveL2D src));
15109 effect(DEF dst, USE src);
15110
15111 ins_cost(125);
15112 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15113 ins_encode %{
15114 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15115 %}
15116 ins_pipe(pipe_slow);
15117 %}
15118
15119 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15120 predicate(UseXmmLoadAndClearUpper);
15121 match(Set dst (MoveL2D src));
15122 effect(DEF dst, USE src);
15123
15124 ins_cost(125);
15125 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15126 ins_encode %{
15127 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15128 %}
15129 ins_pipe(pipe_slow);
15130 %}
15131
15132
15133 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15134 match(Set dst (MoveF2I src));
15135 effect(DEF dst, USE src);
15136
15137 ins_cost(95); // XXX
15138 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15139 ins_encode %{
15140 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15141 %}
15142 ins_pipe(pipe_slow);
15143 %}
15144
15145 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15146 match(Set dst (MoveI2F src));
15147 effect(DEF dst, USE src);
15148
15149 ins_cost(100);
15150 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15151 ins_encode %{
15152 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15153 %}
15154 ins_pipe( ialu_mem_reg );
15155 %}
15156
15157 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15158 match(Set dst (MoveD2L src));
15159 effect(DEF dst, USE src);
15160
15161 ins_cost(95); // XXX
15162 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15163 ins_encode %{
15164 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15165 %}
15166 ins_pipe(pipe_slow);
15167 %}
15168
15169 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15170 match(Set dst (MoveL2D src));
15171 effect(DEF dst, USE src);
15172
15173 ins_cost(100);
15174 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15175 ins_encode %{
15176 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15177 %}
15178 ins_pipe(ialu_mem_reg);
15179 %}
15180
15181 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15182 match(Set dst (MoveF2I src));
15183 effect(DEF dst, USE src);
15184 ins_cost(85);
15185 format %{ "movd $dst,$src\t# MoveF2I" %}
15186 ins_encode %{
15187 __ movdl($dst$$Register, $src$$XMMRegister);
15188 %}
15189 ins_pipe( pipe_slow );
15190 %}
15191
15192 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15193 match(Set dst (MoveD2L src));
15194 effect(DEF dst, USE src);
15195 ins_cost(85);
15196 format %{ "movd $dst,$src\t# MoveD2L" %}
15197 ins_encode %{
15198 __ movdq($dst$$Register, $src$$XMMRegister);
15199 %}
15200 ins_pipe( pipe_slow );
15201 %}
15202
15203 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15204 match(Set dst (MoveI2F src));
15205 effect(DEF dst, USE src);
15206 ins_cost(100);
15207 format %{ "movd $dst,$src\t# MoveI2F" %}
15208 ins_encode %{
15209 __ movdl($dst$$XMMRegister, $src$$Register);
15210 %}
15211 ins_pipe( pipe_slow );
15212 %}
15213
15214 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15215 match(Set dst (MoveL2D src));
15216 effect(DEF dst, USE src);
15217 ins_cost(100);
15218 format %{ "movd $dst,$src\t# MoveL2D" %}
15219 ins_encode %{
15220 __ movdq($dst$$XMMRegister, $src$$Register);
15221 %}
15222 ins_pipe( pipe_slow );
15223 %}
15224
15225 // Fast clearing of an array
15226 // Small non-constant lenght ClearArray for non-AVX512 targets.
15227 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15228 Universe dummy, rFlagsReg cr)
15229 %{
15230 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15231 match(Set dummy (ClearArray cnt base));
15232 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15233
15234 format %{ $$template
15235 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15236 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15237 $$emit$$"jg LARGE\n\t"
15238 $$emit$$"dec rcx\n\t"
15239 $$emit$$"js DONE\t# Zero length\n\t"
15240 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"jge LOOP\n\t"
15243 $$emit$$"jmp DONE\n\t"
15244 $$emit$$"# LARGE:\n\t"
15245 if (UseFastStosb) {
15246 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15247 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15248 } else if (UseXMMForObjInit) {
15249 $$emit$$"mov rdi,rax\n\t"
15250 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15251 $$emit$$"jmpq L_zero_64_bytes\n\t"
15252 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15253 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15254 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15255 $$emit$$"add 0x40,rax\n\t"
15256 $$emit$$"# L_zero_64_bytes:\n\t"
15257 $$emit$$"sub 0x8,rcx\n\t"
15258 $$emit$$"jge L_loop\n\t"
15259 $$emit$$"add 0x4,rcx\n\t"
15260 $$emit$$"jl L_tail\n\t"
15261 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15262 $$emit$$"add 0x20,rax\n\t"
15263 $$emit$$"sub 0x4,rcx\n\t"
15264 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15265 $$emit$$"add 0x4,rcx\n\t"
15266 $$emit$$"jle L_end\n\t"
15267 $$emit$$"dec rcx\n\t"
15268 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15269 $$emit$$"vmovq xmm0,(rax)\n\t"
15270 $$emit$$"add 0x8,rax\n\t"
15271 $$emit$$"dec rcx\n\t"
15272 $$emit$$"jge L_sloop\n\t"
15273 $$emit$$"# L_end:\n\t"
15274 } else {
15275 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15276 }
15277 $$emit$$"# DONE"
15278 %}
15279 ins_encode %{
15280 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15281 $tmp$$XMMRegister, false, knoreg);
15282 %}
15283 ins_pipe(pipe_slow);
15284 %}
15285
15286 // Small non-constant length ClearArray for AVX512 targets.
15287 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15288 Universe dummy, rFlagsReg cr)
15289 %{
15290 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15291 match(Set dummy (ClearArray cnt base));
15292 ins_cost(125);
15293 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15294
15295 format %{ $$template
15296 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15297 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15298 $$emit$$"jg LARGE\n\t"
15299 $$emit$$"dec rcx\n\t"
15300 $$emit$$"js DONE\t# Zero length\n\t"
15301 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"jge LOOP\n\t"
15304 $$emit$$"jmp DONE\n\t"
15305 $$emit$$"# LARGE:\n\t"
15306 if (UseFastStosb) {
15307 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15308 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15309 } else if (UseXMMForObjInit) {
15310 $$emit$$"mov rdi,rax\n\t"
15311 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15312 $$emit$$"jmpq L_zero_64_bytes\n\t"
15313 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15314 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15315 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15316 $$emit$$"add 0x40,rax\n\t"
15317 $$emit$$"# L_zero_64_bytes:\n\t"
15318 $$emit$$"sub 0x8,rcx\n\t"
15319 $$emit$$"jge L_loop\n\t"
15320 $$emit$$"add 0x4,rcx\n\t"
15321 $$emit$$"jl L_tail\n\t"
15322 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15323 $$emit$$"add 0x20,rax\n\t"
15324 $$emit$$"sub 0x4,rcx\n\t"
15325 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15326 $$emit$$"add 0x4,rcx\n\t"
15327 $$emit$$"jle L_end\n\t"
15328 $$emit$$"dec rcx\n\t"
15329 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15330 $$emit$$"vmovq xmm0,(rax)\n\t"
15331 $$emit$$"add 0x8,rax\n\t"
15332 $$emit$$"dec rcx\n\t"
15333 $$emit$$"jge L_sloop\n\t"
15334 $$emit$$"# L_end:\n\t"
15335 } else {
15336 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15337 }
15338 $$emit$$"# DONE"
15339 %}
15340 ins_encode %{
15341 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15342 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15343 %}
15344 ins_pipe(pipe_slow);
15345 %}
15346
15347 // Large non-constant length ClearArray for non-AVX512 targets.
15348 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15349 Universe dummy, rFlagsReg cr)
15350 %{
15351 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15352 match(Set dummy (ClearArray cnt base));
15353 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15354
15355 format %{ $$template
15356 if (UseFastStosb) {
15357 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15358 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15359 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15360 } else if (UseXMMForObjInit) {
15361 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15362 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15363 $$emit$$"jmpq L_zero_64_bytes\n\t"
15364 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15365 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15366 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15367 $$emit$$"add 0x40,rax\n\t"
15368 $$emit$$"# L_zero_64_bytes:\n\t"
15369 $$emit$$"sub 0x8,rcx\n\t"
15370 $$emit$$"jge L_loop\n\t"
15371 $$emit$$"add 0x4,rcx\n\t"
15372 $$emit$$"jl L_tail\n\t"
15373 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15374 $$emit$$"add 0x20,rax\n\t"
15375 $$emit$$"sub 0x4,rcx\n\t"
15376 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15377 $$emit$$"add 0x4,rcx\n\t"
15378 $$emit$$"jle L_end\n\t"
15379 $$emit$$"dec rcx\n\t"
15380 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15381 $$emit$$"vmovq xmm0,(rax)\n\t"
15382 $$emit$$"add 0x8,rax\n\t"
15383 $$emit$$"dec rcx\n\t"
15384 $$emit$$"jge L_sloop\n\t"
15385 $$emit$$"# L_end:\n\t"
15386 } else {
15387 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15388 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15389 }
15390 %}
15391 ins_encode %{
15392 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15393 $tmp$$XMMRegister, true, knoreg);
15394 %}
15395 ins_pipe(pipe_slow);
15396 %}
15397
15398 // Large non-constant length ClearArray for AVX512 targets.
15399 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15400 Universe dummy, rFlagsReg cr)
15401 %{
15402 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15403 match(Set dummy (ClearArray cnt base));
15404 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15405
15406 format %{ $$template
15407 if (UseFastStosb) {
15408 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15409 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15410 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15411 } else if (UseXMMForObjInit) {
15412 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15413 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15414 $$emit$$"jmpq L_zero_64_bytes\n\t"
15415 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15416 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15417 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15418 $$emit$$"add 0x40,rax\n\t"
15419 $$emit$$"# L_zero_64_bytes:\n\t"
15420 $$emit$$"sub 0x8,rcx\n\t"
15421 $$emit$$"jge L_loop\n\t"
15422 $$emit$$"add 0x4,rcx\n\t"
15423 $$emit$$"jl L_tail\n\t"
15424 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15425 $$emit$$"add 0x20,rax\n\t"
15426 $$emit$$"sub 0x4,rcx\n\t"
15427 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15428 $$emit$$"add 0x4,rcx\n\t"
15429 $$emit$$"jle L_end\n\t"
15430 $$emit$$"dec rcx\n\t"
15431 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15432 $$emit$$"vmovq xmm0,(rax)\n\t"
15433 $$emit$$"add 0x8,rax\n\t"
15434 $$emit$$"dec rcx\n\t"
15435 $$emit$$"jge L_sloop\n\t"
15436 $$emit$$"# L_end:\n\t"
15437 } else {
15438 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15439 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15440 }
15441 %}
15442 ins_encode %{
15443 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15444 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15445 %}
15446 ins_pipe(pipe_slow);
15447 %}
15448
15449 // Small constant length ClearArray for AVX512 targets.
15450 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15451 %{
15452 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15453 match(Set dummy (ClearArray cnt base));
15454 ins_cost(100);
15455 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15456 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15457 ins_encode %{
15458 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15459 %}
15460 ins_pipe(pipe_slow);
15461 %}
15462
15463 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15464 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15465 %{
15466 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15467 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15468 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15469
15470 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15471 ins_encode %{
15472 __ string_compare($str1$$Register, $str2$$Register,
15473 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15474 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15475 %}
15476 ins_pipe( pipe_slow );
15477 %}
15478
15479 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15480 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15481 %{
15482 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15483 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15484 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15485
15486 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15487 ins_encode %{
15488 __ string_compare($str1$$Register, $str2$$Register,
15489 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15490 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15491 %}
15492 ins_pipe( pipe_slow );
15493 %}
15494
15495 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15496 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15497 %{
15498 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15499 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15500 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15501
15502 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15503 ins_encode %{
15504 __ string_compare($str1$$Register, $str2$$Register,
15505 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15506 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15507 %}
15508 ins_pipe( pipe_slow );
15509 %}
15510
15511 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15512 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15513 %{
15514 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15515 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15516 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15517
15518 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15519 ins_encode %{
15520 __ string_compare($str1$$Register, $str2$$Register,
15521 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15522 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15523 %}
15524 ins_pipe( pipe_slow );
15525 %}
15526
15527 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15528 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15529 %{
15530 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15531 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15532 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15533
15534 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15535 ins_encode %{
15536 __ string_compare($str1$$Register, $str2$$Register,
15537 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15538 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15539 %}
15540 ins_pipe( pipe_slow );
15541 %}
15542
15543 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15544 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15545 %{
15546 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15547 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15548 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15549
15550 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15551 ins_encode %{
15552 __ string_compare($str1$$Register, $str2$$Register,
15553 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15554 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15555 %}
15556 ins_pipe( pipe_slow );
15557 %}
15558
15559 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15560 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15561 %{
15562 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15563 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15564 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15565
15566 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15567 ins_encode %{
15568 __ string_compare($str2$$Register, $str1$$Register,
15569 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15570 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15571 %}
15572 ins_pipe( pipe_slow );
15573 %}
15574
15575 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15576 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15577 %{
15578 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15579 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15580 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15581
15582 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15583 ins_encode %{
15584 __ string_compare($str2$$Register, $str1$$Register,
15585 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15586 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15587 %}
15588 ins_pipe( pipe_slow );
15589 %}
15590
15591 // fast search of substring with known size.
15592 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15593 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15594 %{
15595 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15596 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15597 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15598
15599 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15600 ins_encode %{
15601 int icnt2 = (int)$int_cnt2$$constant;
15602 if (icnt2 >= 16) {
15603 // IndexOf for constant substrings with size >= 16 elements
15604 // which don't need to be loaded through stack.
15605 __ string_indexofC8($str1$$Register, $str2$$Register,
15606 $cnt1$$Register, $cnt2$$Register,
15607 icnt2, $result$$Register,
15608 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15609 } else {
15610 // Small strings are loaded through stack if they cross page boundary.
15611 __ string_indexof($str1$$Register, $str2$$Register,
15612 $cnt1$$Register, $cnt2$$Register,
15613 icnt2, $result$$Register,
15614 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15615 }
15616 %}
15617 ins_pipe( pipe_slow );
15618 %}
15619
15620 // fast search of substring with known size.
15621 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15622 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15623 %{
15624 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15625 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15626 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15627
15628 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15629 ins_encode %{
15630 int icnt2 = (int)$int_cnt2$$constant;
15631 if (icnt2 >= 8) {
15632 // IndexOf for constant substrings with size >= 8 elements
15633 // which don't need to be loaded through stack.
15634 __ string_indexofC8($str1$$Register, $str2$$Register,
15635 $cnt1$$Register, $cnt2$$Register,
15636 icnt2, $result$$Register,
15637 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15638 } else {
15639 // Small strings are loaded through stack if they cross page boundary.
15640 __ string_indexof($str1$$Register, $str2$$Register,
15641 $cnt1$$Register, $cnt2$$Register,
15642 icnt2, $result$$Register,
15643 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15644 }
15645 %}
15646 ins_pipe( pipe_slow );
15647 %}
15648
15649 // fast search of substring with known size.
15650 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15651 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15652 %{
15653 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15654 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15655 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15656
15657 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15658 ins_encode %{
15659 int icnt2 = (int)$int_cnt2$$constant;
15660 if (icnt2 >= 8) {
15661 // IndexOf for constant substrings with size >= 8 elements
15662 // which don't need to be loaded through stack.
15663 __ string_indexofC8($str1$$Register, $str2$$Register,
15664 $cnt1$$Register, $cnt2$$Register,
15665 icnt2, $result$$Register,
15666 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15667 } else {
15668 // Small strings are loaded through stack if they cross page boundary.
15669 __ string_indexof($str1$$Register, $str2$$Register,
15670 $cnt1$$Register, $cnt2$$Register,
15671 icnt2, $result$$Register,
15672 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15673 }
15674 %}
15675 ins_pipe( pipe_slow );
15676 %}
15677
15678 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15679 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15680 %{
15681 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15682 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15683 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15684
15685 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15686 ins_encode %{
15687 __ string_indexof($str1$$Register, $str2$$Register,
15688 $cnt1$$Register, $cnt2$$Register,
15689 (-1), $result$$Register,
15690 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15691 %}
15692 ins_pipe( pipe_slow );
15693 %}
15694
15695 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15696 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15697 %{
15698 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15699 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15700 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15701
15702 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15703 ins_encode %{
15704 __ string_indexof($str1$$Register, $str2$$Register,
15705 $cnt1$$Register, $cnt2$$Register,
15706 (-1), $result$$Register,
15707 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15708 %}
15709 ins_pipe( pipe_slow );
15710 %}
15711
15712 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15713 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15714 %{
15715 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15716 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15717 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15718
15719 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15720 ins_encode %{
15721 __ string_indexof($str1$$Register, $str2$$Register,
15722 $cnt1$$Register, $cnt2$$Register,
15723 (-1), $result$$Register,
15724 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15725 %}
15726 ins_pipe( pipe_slow );
15727 %}
15728
15729 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15730 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15731 %{
15732 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15733 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15734 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15735 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15736 ins_encode %{
15737 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15738 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15739 %}
15740 ins_pipe( pipe_slow );
15741 %}
15742
15743 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15744 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15745 %{
15746 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15747 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15748 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15749 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15750 ins_encode %{
15751 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15752 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15753 %}
15754 ins_pipe( pipe_slow );
15755 %}
15756
15757 // fast string equals
15758 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15759 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15760 %{
15761 predicate(!VM_Version::supports_avx512vlbw());
15762 match(Set result (StrEquals (Binary str1 str2) cnt));
15763 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15764
15765 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15766 ins_encode %{
15767 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15768 $cnt$$Register, $result$$Register, $tmp3$$Register,
15769 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15770 %}
15771 ins_pipe( pipe_slow );
15772 %}
15773
15774 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15775 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15776 %{
15777 predicate(VM_Version::supports_avx512vlbw());
15778 match(Set result (StrEquals (Binary str1 str2) cnt));
15779 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15780
15781 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15782 ins_encode %{
15783 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15784 $cnt$$Register, $result$$Register, $tmp3$$Register,
15785 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15786 %}
15787 ins_pipe( pipe_slow );
15788 %}
15789
15790 // fast array equals
15791 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15792 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15793 %{
15794 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15795 match(Set result (AryEq ary1 ary2));
15796 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15797
15798 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15799 ins_encode %{
15800 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15801 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15802 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15803 %}
15804 ins_pipe( pipe_slow );
15805 %}
15806
15807 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15808 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15809 %{
15810 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15811 match(Set result (AryEq ary1 ary2));
15812 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15813
15814 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15815 ins_encode %{
15816 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15817 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15818 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15819 %}
15820 ins_pipe( pipe_slow );
15821 %}
15822
15823 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15824 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15825 %{
15826 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15827 match(Set result (AryEq ary1 ary2));
15828 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15829
15830 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15831 ins_encode %{
15832 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15833 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15834 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15835 %}
15836 ins_pipe( pipe_slow );
15837 %}
15838
15839 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15840 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15841 %{
15842 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15843 match(Set result (AryEq ary1 ary2));
15844 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15845
15846 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15847 ins_encode %{
15848 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15849 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15850 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15851 %}
15852 ins_pipe( pipe_slow );
15853 %}
15854
15855 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15856 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15857 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15858 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15859 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15860 %{
15861 predicate(UseAVX >= 2);
15862 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15863 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15864 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15865 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15866 USE basic_type, KILL cr);
15867
15868 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15869 ins_encode %{
15870 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15871 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15872 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15873 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15874 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15875 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15876 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15877 %}
15878 ins_pipe( pipe_slow );
15879 %}
15880
15881 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15882 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15883 %{
15884 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15885 match(Set result (CountPositives ary1 len));
15886 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15887
15888 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15889 ins_encode %{
15890 __ count_positives($ary1$$Register, $len$$Register,
15891 $result$$Register, $tmp3$$Register,
15892 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15893 %}
15894 ins_pipe( pipe_slow );
15895 %}
15896
15897 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15898 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15899 %{
15900 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15901 match(Set result (CountPositives ary1 len));
15902 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15903
15904 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15905 ins_encode %{
15906 __ count_positives($ary1$$Register, $len$$Register,
15907 $result$$Register, $tmp3$$Register,
15908 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15909 %}
15910 ins_pipe( pipe_slow );
15911 %}
15912
15913 // fast char[] to byte[] compression
15914 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15915 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15916 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15917 match(Set result (StrCompressedCopy src (Binary dst len)));
15918 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15919 USE_KILL len, KILL tmp5, KILL cr);
15920
15921 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15922 ins_encode %{
15923 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15924 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15925 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15926 knoreg, knoreg);
15927 %}
15928 ins_pipe( pipe_slow );
15929 %}
15930
15931 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15932 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15933 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15934 match(Set result (StrCompressedCopy src (Binary dst len)));
15935 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15936 USE_KILL len, KILL tmp5, KILL cr);
15937
15938 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15939 ins_encode %{
15940 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15941 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15942 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15943 $ktmp1$$KRegister, $ktmp2$$KRegister);
15944 %}
15945 ins_pipe( pipe_slow );
15946 %}
15947 // fast byte[] to char[] inflation
15948 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15949 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15950 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15951 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15952 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15953
15954 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15955 ins_encode %{
15956 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15957 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15958 %}
15959 ins_pipe( pipe_slow );
15960 %}
15961
15962 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15963 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15964 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15965 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15966 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15967
15968 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15969 ins_encode %{
15970 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15971 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15972 %}
15973 ins_pipe( pipe_slow );
15974 %}
15975
15976 // encode char[] to byte[] in ISO_8859_1
15977 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15978 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15979 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15980 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15981 match(Set result (EncodeISOArray src (Binary dst len)));
15982 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15983
15984 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15985 ins_encode %{
15986 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15987 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15988 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15989 %}
15990 ins_pipe( pipe_slow );
15991 %}
15992
15993 // encode char[] to byte[] in ASCII
15994 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15995 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15996 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15997 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15998 match(Set result (EncodeISOArray src (Binary dst len)));
15999 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16000
16001 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16002 ins_encode %{
16003 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16004 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16005 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16006 %}
16007 ins_pipe( pipe_slow );
16008 %}
16009
16010 //----------Overflow Math Instructions-----------------------------------------
16011
16012 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16013 %{
16014 match(Set cr (OverflowAddI op1 op2));
16015 effect(DEF cr, USE_KILL op1, USE op2);
16016
16017 format %{ "addl $op1, $op2\t# overflow check int" %}
16018
16019 ins_encode %{
16020 __ addl($op1$$Register, $op2$$Register);
16021 %}
16022 ins_pipe(ialu_reg_reg);
16023 %}
16024
16025 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16026 %{
16027 match(Set cr (OverflowAddI op1 op2));
16028 effect(DEF cr, USE_KILL op1, USE op2);
16029
16030 format %{ "addl $op1, $op2\t# overflow check int" %}
16031
16032 ins_encode %{
16033 __ addl($op1$$Register, $op2$$constant);
16034 %}
16035 ins_pipe(ialu_reg_reg);
16036 %}
16037
16038 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16039 %{
16040 match(Set cr (OverflowAddL op1 op2));
16041 effect(DEF cr, USE_KILL op1, USE op2);
16042
16043 format %{ "addq $op1, $op2\t# overflow check long" %}
16044 ins_encode %{
16045 __ addq($op1$$Register, $op2$$Register);
16046 %}
16047 ins_pipe(ialu_reg_reg);
16048 %}
16049
16050 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16051 %{
16052 match(Set cr (OverflowAddL op1 op2));
16053 effect(DEF cr, USE_KILL op1, USE op2);
16054
16055 format %{ "addq $op1, $op2\t# overflow check long" %}
16056 ins_encode %{
16057 __ addq($op1$$Register, $op2$$constant);
16058 %}
16059 ins_pipe(ialu_reg_reg);
16060 %}
16061
16062 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16063 %{
16064 match(Set cr (OverflowSubI op1 op2));
16065
16066 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16067 ins_encode %{
16068 __ cmpl($op1$$Register, $op2$$Register);
16069 %}
16070 ins_pipe(ialu_reg_reg);
16071 %}
16072
16073 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16074 %{
16075 match(Set cr (OverflowSubI op1 op2));
16076
16077 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16078 ins_encode %{
16079 __ cmpl($op1$$Register, $op2$$constant);
16080 %}
16081 ins_pipe(ialu_reg_reg);
16082 %}
16083
16084 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16085 %{
16086 match(Set cr (OverflowSubL op1 op2));
16087
16088 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16089 ins_encode %{
16090 __ cmpq($op1$$Register, $op2$$Register);
16091 %}
16092 ins_pipe(ialu_reg_reg);
16093 %}
16094
16095 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16096 %{
16097 match(Set cr (OverflowSubL op1 op2));
16098
16099 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16100 ins_encode %{
16101 __ cmpq($op1$$Register, $op2$$constant);
16102 %}
16103 ins_pipe(ialu_reg_reg);
16104 %}
16105
16106 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16107 %{
16108 match(Set cr (OverflowSubI zero op2));
16109 effect(DEF cr, USE_KILL op2);
16110
16111 format %{ "negl $op2\t# overflow check int" %}
16112 ins_encode %{
16113 __ negl($op2$$Register);
16114 %}
16115 ins_pipe(ialu_reg_reg);
16116 %}
16117
16118 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16119 %{
16120 match(Set cr (OverflowSubL zero op2));
16121 effect(DEF cr, USE_KILL op2);
16122
16123 format %{ "negq $op2\t# overflow check long" %}
16124 ins_encode %{
16125 __ negq($op2$$Register);
16126 %}
16127 ins_pipe(ialu_reg_reg);
16128 %}
16129
16130 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16131 %{
16132 match(Set cr (OverflowMulI op1 op2));
16133 effect(DEF cr, USE_KILL op1, USE op2);
16134
16135 format %{ "imull $op1, $op2\t# overflow check int" %}
16136 ins_encode %{
16137 __ imull($op1$$Register, $op2$$Register);
16138 %}
16139 ins_pipe(ialu_reg_reg_alu0);
16140 %}
16141
16142 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16143 %{
16144 match(Set cr (OverflowMulI op1 op2));
16145 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16146
16147 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16148 ins_encode %{
16149 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16150 %}
16151 ins_pipe(ialu_reg_reg_alu0);
16152 %}
16153
16154 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16155 %{
16156 match(Set cr (OverflowMulL op1 op2));
16157 effect(DEF cr, USE_KILL op1, USE op2);
16158
16159 format %{ "imulq $op1, $op2\t# overflow check long" %}
16160 ins_encode %{
16161 __ imulq($op1$$Register, $op2$$Register);
16162 %}
16163 ins_pipe(ialu_reg_reg_alu0);
16164 %}
16165
16166 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16167 %{
16168 match(Set cr (OverflowMulL op1 op2));
16169 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16170
16171 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16172 ins_encode %{
16173 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16174 %}
16175 ins_pipe(ialu_reg_reg_alu0);
16176 %}
16177
16178
16179 //----------Control Flow Instructions------------------------------------------
16180 // Signed compare Instructions
16181
16182 // XXX more variants!!
16183 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16184 %{
16185 match(Set cr (CmpI op1 op2));
16186 effect(DEF cr, USE op1, USE op2);
16187
16188 format %{ "cmpl $op1, $op2" %}
16189 ins_encode %{
16190 __ cmpl($op1$$Register, $op2$$Register);
16191 %}
16192 ins_pipe(ialu_cr_reg_reg);
16193 %}
16194
16195 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16196 %{
16197 match(Set cr (CmpI op1 op2));
16198
16199 format %{ "cmpl $op1, $op2" %}
16200 ins_encode %{
16201 __ cmpl($op1$$Register, $op2$$constant);
16202 %}
16203 ins_pipe(ialu_cr_reg_imm);
16204 %}
16205
16206 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16207 %{
16208 match(Set cr (CmpI op1 (LoadI op2)));
16209
16210 ins_cost(500); // XXX
16211 format %{ "cmpl $op1, $op2" %}
16212 ins_encode %{
16213 __ cmpl($op1$$Register, $op2$$Address);
16214 %}
16215 ins_pipe(ialu_cr_reg_mem);
16216 %}
16217
16218 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16219 %{
16220 match(Set cr (CmpI src zero));
16221
16222 format %{ "testl $src, $src" %}
16223 ins_encode %{
16224 __ testl($src$$Register, $src$$Register);
16225 %}
16226 ins_pipe(ialu_cr_reg_imm);
16227 %}
16228
16229 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16230 %{
16231 match(Set cr (CmpI (AndI src con) zero));
16232
16233 format %{ "testl $src, $con" %}
16234 ins_encode %{
16235 __ testl($src$$Register, $con$$constant);
16236 %}
16237 ins_pipe(ialu_cr_reg_imm);
16238 %}
16239
16240 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16241 %{
16242 match(Set cr (CmpI (AndI src1 src2) zero));
16243
16244 format %{ "testl $src1, $src2" %}
16245 ins_encode %{
16246 __ testl($src1$$Register, $src2$$Register);
16247 %}
16248 ins_pipe(ialu_cr_reg_imm);
16249 %}
16250
16251 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16252 %{
16253 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16254
16255 format %{ "testl $src, $mem" %}
16256 ins_encode %{
16257 __ testl($src$$Register, $mem$$Address);
16258 %}
16259 ins_pipe(ialu_cr_reg_mem);
16260 %}
16261
16262 // Unsigned compare Instructions; really, same as signed except they
16263 // produce an rFlagsRegU instead of rFlagsReg.
16264 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16265 %{
16266 match(Set cr (CmpU op1 op2));
16267
16268 format %{ "cmpl $op1, $op2\t# unsigned" %}
16269 ins_encode %{
16270 __ cmpl($op1$$Register, $op2$$Register);
16271 %}
16272 ins_pipe(ialu_cr_reg_reg);
16273 %}
16274
16275 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16276 %{
16277 match(Set cr (CmpU op1 op2));
16278
16279 format %{ "cmpl $op1, $op2\t# unsigned" %}
16280 ins_encode %{
16281 __ cmpl($op1$$Register, $op2$$constant);
16282 %}
16283 ins_pipe(ialu_cr_reg_imm);
16284 %}
16285
16286 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16287 %{
16288 match(Set cr (CmpU op1 (LoadI op2)));
16289
16290 ins_cost(500); // XXX
16291 format %{ "cmpl $op1, $op2\t# unsigned" %}
16292 ins_encode %{
16293 __ cmpl($op1$$Register, $op2$$Address);
16294 %}
16295 ins_pipe(ialu_cr_reg_mem);
16296 %}
16297
16298 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16299 %{
16300 match(Set cr (CmpU src zero));
16301
16302 format %{ "testl $src, $src\t# unsigned" %}
16303 ins_encode %{
16304 __ testl($src$$Register, $src$$Register);
16305 %}
16306 ins_pipe(ialu_cr_reg_imm);
16307 %}
16308
16309 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16310 %{
16311 match(Set cr (CmpP op1 op2));
16312
16313 format %{ "cmpq $op1, $op2\t# ptr" %}
16314 ins_encode %{
16315 __ cmpq($op1$$Register, $op2$$Register);
16316 %}
16317 ins_pipe(ialu_cr_reg_reg);
16318 %}
16319
16320 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16321 %{
16322 match(Set cr (CmpP op1 (LoadP op2)));
16323 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16324
16325 ins_cost(500); // XXX
16326 format %{ "cmpq $op1, $op2\t# ptr" %}
16327 ins_encode %{
16328 __ cmpq($op1$$Register, $op2$$Address);
16329 %}
16330 ins_pipe(ialu_cr_reg_mem);
16331 %}
16332
16333 // XXX this is generalized by compP_rReg_mem???
16334 // Compare raw pointer (used in out-of-heap check).
16335 // Only works because non-oop pointers must be raw pointers
16336 // and raw pointers have no anti-dependencies.
16337 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16338 %{
16339 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16340 n->in(2)->as_Load()->barrier_data() == 0);
16341 match(Set cr (CmpP op1 (LoadP op2)));
16342
16343 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16344 ins_encode %{
16345 __ cmpq($op1$$Register, $op2$$Address);
16346 %}
16347 ins_pipe(ialu_cr_reg_mem);
16348 %}
16349
16350 // This will generate a signed flags result. This should be OK since
16351 // any compare to a zero should be eq/neq.
16352 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16353 %{
16354 match(Set cr (CmpP src zero));
16355
16356 format %{ "testq $src, $src\t# ptr" %}
16357 ins_encode %{
16358 __ testq($src$$Register, $src$$Register);
16359 %}
16360 ins_pipe(ialu_cr_reg_imm);
16361 %}
16362
16363 // This will generate a signed flags result. This should be OK since
16364 // any compare to a zero should be eq/neq.
16365 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16366 %{
16367 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16368 n->in(1)->as_Load()->barrier_data() == 0);
16369 match(Set cr (CmpP (LoadP op) zero));
16370
16371 ins_cost(500); // XXX
16372 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16373 ins_encode %{
16374 __ testq($op$$Address, 0xFFFFFFFF);
16375 %}
16376 ins_pipe(ialu_cr_reg_imm);
16377 %}
16378
16379 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16380 %{
16381 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16382 n->in(1)->as_Load()->barrier_data() == 0);
16383 match(Set cr (CmpP (LoadP mem) zero));
16384
16385 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16386 ins_encode %{
16387 __ cmpq(r12, $mem$$Address);
16388 %}
16389 ins_pipe(ialu_cr_reg_mem);
16390 %}
16391
16392 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16393 %{
16394 match(Set cr (CmpN op1 op2));
16395
16396 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16397 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16398 ins_pipe(ialu_cr_reg_reg);
16399 %}
16400
16401 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16402 %{
16403 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16404 match(Set cr (CmpN src (LoadN mem)));
16405
16406 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16407 ins_encode %{
16408 __ cmpl($src$$Register, $mem$$Address);
16409 %}
16410 ins_pipe(ialu_cr_reg_mem);
16411 %}
16412
16413 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16414 match(Set cr (CmpN op1 op2));
16415
16416 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16417 ins_encode %{
16418 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16419 %}
16420 ins_pipe(ialu_cr_reg_imm);
16421 %}
16422
16423 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16424 %{
16425 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16426 match(Set cr (CmpN src (LoadN mem)));
16427
16428 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16429 ins_encode %{
16430 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16431 %}
16432 ins_pipe(ialu_cr_reg_mem);
16433 %}
16434
16435 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16436 match(Set cr (CmpN op1 op2));
16437
16438 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16439 ins_encode %{
16440 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16441 %}
16442 ins_pipe(ialu_cr_reg_imm);
16443 %}
16444
16445 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16446 %{
16447 predicate(!UseCompactObjectHeaders);
16448 match(Set cr (CmpN src (LoadNKlass mem)));
16449
16450 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16451 ins_encode %{
16452 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16453 %}
16454 ins_pipe(ialu_cr_reg_mem);
16455 %}
16456
16457 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16458 match(Set cr (CmpN src zero));
16459
16460 format %{ "testl $src, $src\t# compressed ptr" %}
16461 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16462 ins_pipe(ialu_cr_reg_imm);
16463 %}
16464
16465 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16466 %{
16467 predicate(CompressedOops::base() != nullptr &&
16468 n->in(1)->as_Load()->barrier_data() == 0);
16469 match(Set cr (CmpN (LoadN mem) zero));
16470
16471 ins_cost(500); // XXX
16472 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16473 ins_encode %{
16474 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16475 %}
16476 ins_pipe(ialu_cr_reg_mem);
16477 %}
16478
16479 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16480 %{
16481 predicate(CompressedOops::base() == nullptr &&
16482 n->in(1)->as_Load()->barrier_data() == 0);
16483 match(Set cr (CmpN (LoadN mem) zero));
16484
16485 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16486 ins_encode %{
16487 __ cmpl(r12, $mem$$Address);
16488 %}
16489 ins_pipe(ialu_cr_reg_mem);
16490 %}
16491
16492 // Yanked all unsigned pointer compare operations.
16493 // Pointer compares are done with CmpP which is already unsigned.
16494
16495 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16496 %{
16497 match(Set cr (CmpL op1 op2));
16498
16499 format %{ "cmpq $op1, $op2" %}
16500 ins_encode %{
16501 __ cmpq($op1$$Register, $op2$$Register);
16502 %}
16503 ins_pipe(ialu_cr_reg_reg);
16504 %}
16505
16506 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16507 %{
16508 match(Set cr (CmpL op1 op2));
16509
16510 format %{ "cmpq $op1, $op2" %}
16511 ins_encode %{
16512 __ cmpq($op1$$Register, $op2$$constant);
16513 %}
16514 ins_pipe(ialu_cr_reg_imm);
16515 %}
16516
16517 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16518 %{
16519 match(Set cr (CmpL op1 (LoadL op2)));
16520
16521 format %{ "cmpq $op1, $op2" %}
16522 ins_encode %{
16523 __ cmpq($op1$$Register, $op2$$Address);
16524 %}
16525 ins_pipe(ialu_cr_reg_mem);
16526 %}
16527
16528 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16529 %{
16530 match(Set cr (CmpL src zero));
16531
16532 format %{ "testq $src, $src" %}
16533 ins_encode %{
16534 __ testq($src$$Register, $src$$Register);
16535 %}
16536 ins_pipe(ialu_cr_reg_imm);
16537 %}
16538
16539 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16540 %{
16541 match(Set cr (CmpL (AndL src con) zero));
16542
16543 format %{ "testq $src, $con\t# long" %}
16544 ins_encode %{
16545 __ testq($src$$Register, $con$$constant);
16546 %}
16547 ins_pipe(ialu_cr_reg_imm);
16548 %}
16549
16550 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16551 %{
16552 match(Set cr (CmpL (AndL src1 src2) zero));
16553
16554 format %{ "testq $src1, $src2\t# long" %}
16555 ins_encode %{
16556 __ testq($src1$$Register, $src2$$Register);
16557 %}
16558 ins_pipe(ialu_cr_reg_imm);
16559 %}
16560
16561 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16562 %{
16563 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16564
16565 format %{ "testq $src, $mem" %}
16566 ins_encode %{
16567 __ testq($src$$Register, $mem$$Address);
16568 %}
16569 ins_pipe(ialu_cr_reg_mem);
16570 %}
16571
16572 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16573 %{
16574 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16575
16576 format %{ "testq $src, $mem" %}
16577 ins_encode %{
16578 __ testq($src$$Register, $mem$$Address);
16579 %}
16580 ins_pipe(ialu_cr_reg_mem);
16581 %}
16582
16583 // Manifest a CmpU result in an integer register. Very painful.
16584 // This is the test to avoid.
16585 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16586 %{
16587 match(Set dst (CmpU3 src1 src2));
16588 effect(KILL flags);
16589
16590 ins_cost(275); // XXX
16591 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16592 "movl $dst, -1\n\t"
16593 "jb,u done\n\t"
16594 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16595 "done:" %}
16596 ins_encode %{
16597 Label done;
16598 __ cmpl($src1$$Register, $src2$$Register);
16599 __ movl($dst$$Register, -1);
16600 __ jccb(Assembler::below, done);
16601 __ setcc(Assembler::notZero, $dst$$Register);
16602 __ bind(done);
16603 %}
16604 ins_pipe(pipe_slow);
16605 %}
16606
16607 // Manifest a CmpL result in an integer register. Very painful.
16608 // This is the test to avoid.
16609 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16610 %{
16611 match(Set dst (CmpL3 src1 src2));
16612 effect(KILL flags);
16613
16614 ins_cost(275); // XXX
16615 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16616 "movl $dst, -1\n\t"
16617 "jl,s done\n\t"
16618 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16619 "done:" %}
16620 ins_encode %{
16621 Label done;
16622 __ cmpq($src1$$Register, $src2$$Register);
16623 __ movl($dst$$Register, -1);
16624 __ jccb(Assembler::less, done);
16625 __ setcc(Assembler::notZero, $dst$$Register);
16626 __ bind(done);
16627 %}
16628 ins_pipe(pipe_slow);
16629 %}
16630
16631 // Manifest a CmpUL result in an integer register. Very painful.
16632 // This is the test to avoid.
16633 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16634 %{
16635 match(Set dst (CmpUL3 src1 src2));
16636 effect(KILL flags);
16637
16638 ins_cost(275); // XXX
16639 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16640 "movl $dst, -1\n\t"
16641 "jb,u done\n\t"
16642 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16643 "done:" %}
16644 ins_encode %{
16645 Label done;
16646 __ cmpq($src1$$Register, $src2$$Register);
16647 __ movl($dst$$Register, -1);
16648 __ jccb(Assembler::below, done);
16649 __ setcc(Assembler::notZero, $dst$$Register);
16650 __ bind(done);
16651 %}
16652 ins_pipe(pipe_slow);
16653 %}
16654
16655 // Unsigned long compare Instructions; really, same as signed long except they
16656 // produce an rFlagsRegU instead of rFlagsReg.
16657 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16658 %{
16659 match(Set cr (CmpUL op1 op2));
16660
16661 format %{ "cmpq $op1, $op2\t# unsigned" %}
16662 ins_encode %{
16663 __ cmpq($op1$$Register, $op2$$Register);
16664 %}
16665 ins_pipe(ialu_cr_reg_reg);
16666 %}
16667
16668 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16669 %{
16670 match(Set cr (CmpUL op1 op2));
16671
16672 format %{ "cmpq $op1, $op2\t# unsigned" %}
16673 ins_encode %{
16674 __ cmpq($op1$$Register, $op2$$constant);
16675 %}
16676 ins_pipe(ialu_cr_reg_imm);
16677 %}
16678
16679 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16680 %{
16681 match(Set cr (CmpUL op1 (LoadL op2)));
16682
16683 format %{ "cmpq $op1, $op2\t# unsigned" %}
16684 ins_encode %{
16685 __ cmpq($op1$$Register, $op2$$Address);
16686 %}
16687 ins_pipe(ialu_cr_reg_mem);
16688 %}
16689
16690 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16691 %{
16692 match(Set cr (CmpUL src zero));
16693
16694 format %{ "testq $src, $src\t# unsigned" %}
16695 ins_encode %{
16696 __ testq($src$$Register, $src$$Register);
16697 %}
16698 ins_pipe(ialu_cr_reg_imm);
16699 %}
16700
16701 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16702 %{
16703 match(Set cr (CmpI (LoadB mem) imm));
16704
16705 ins_cost(125);
16706 format %{ "cmpb $mem, $imm" %}
16707 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16708 ins_pipe(ialu_cr_reg_mem);
16709 %}
16710
16711 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16712 %{
16713 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16714
16715 ins_cost(125);
16716 format %{ "testb $mem, $imm\t# ubyte" %}
16717 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16718 ins_pipe(ialu_cr_reg_mem);
16719 %}
16720
16721 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16722 %{
16723 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16724
16725 ins_cost(125);
16726 format %{ "testb $mem, $imm\t# byte" %}
16727 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16728 ins_pipe(ialu_cr_reg_mem);
16729 %}
16730
16731 //----------Max and Min--------------------------------------------------------
16732 // Min Instructions
16733
16734 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16735 %{
16736 predicate(!UseAPX);
16737 effect(USE_DEF dst, USE src, USE cr);
16738
16739 format %{ "cmovlgt $dst, $src\t# min" %}
16740 ins_encode %{
16741 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16742 %}
16743 ins_pipe(pipe_cmov_reg);
16744 %}
16745
16746 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16747 %{
16748 predicate(UseAPX);
16749 effect(DEF dst, USE src1, USE src2, USE cr);
16750
16751 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16752 ins_encode %{
16753 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16754 %}
16755 ins_pipe(pipe_cmov_reg);
16756 %}
16757
16758 instruct minI_rReg(rRegI dst, rRegI src)
16759 %{
16760 predicate(!UseAPX);
16761 match(Set dst (MinI dst src));
16762
16763 ins_cost(200);
16764 expand %{
16765 rFlagsReg cr;
16766 compI_rReg(cr, dst, src);
16767 cmovI_reg_g(dst, src, cr);
16768 %}
16769 %}
16770
16771 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16772 %{
16773 predicate(UseAPX);
16774 match(Set dst (MinI src1 src2));
16775 effect(DEF dst, USE src1, USE src2);
16776 flag(PD::Flag_ndd_demotable_opr1);
16777
16778 ins_cost(200);
16779 expand %{
16780 rFlagsReg cr;
16781 compI_rReg(cr, src1, src2);
16782 cmovI_reg_g_ndd(dst, src1, src2, cr);
16783 %}
16784 %}
16785
16786 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16787 %{
16788 predicate(!UseAPX);
16789 effect(USE_DEF dst, USE src, USE cr);
16790
16791 format %{ "cmovllt $dst, $src\t# max" %}
16792 ins_encode %{
16793 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16794 %}
16795 ins_pipe(pipe_cmov_reg);
16796 %}
16797
16798 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16799 %{
16800 predicate(UseAPX);
16801 effect(DEF dst, USE src1, USE src2, USE cr);
16802
16803 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16804 ins_encode %{
16805 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16806 %}
16807 ins_pipe(pipe_cmov_reg);
16808 %}
16809
16810 instruct maxI_rReg(rRegI dst, rRegI src)
16811 %{
16812 predicate(!UseAPX);
16813 match(Set dst (MaxI dst src));
16814
16815 ins_cost(200);
16816 expand %{
16817 rFlagsReg cr;
16818 compI_rReg(cr, dst, src);
16819 cmovI_reg_l(dst, src, cr);
16820 %}
16821 %}
16822
16823 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16824 %{
16825 predicate(UseAPX);
16826 match(Set dst (MaxI src1 src2));
16827 effect(DEF dst, USE src1, USE src2);
16828 flag(PD::Flag_ndd_demotable_opr1);
16829
16830 ins_cost(200);
16831 expand %{
16832 rFlagsReg cr;
16833 compI_rReg(cr, src1, src2);
16834 cmovI_reg_l_ndd(dst, src1, src2, cr);
16835 %}
16836 %}
16837
16838 // ============================================================================
16839 // Branch Instructions
16840
16841 // Jump Direct - Label defines a relative address from JMP+1
16842 instruct jmpDir(label labl)
16843 %{
16844 match(Goto);
16845 effect(USE labl);
16846
16847 ins_cost(300);
16848 format %{ "jmp $labl" %}
16849 size(5);
16850 ins_encode %{
16851 Label* L = $labl$$label;
16852 __ jmp(*L, false); // Always long jump
16853 %}
16854 ins_pipe(pipe_jmp);
16855 %}
16856
16857 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16858 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16859 %{
16860 match(If cop cr);
16861 effect(USE labl);
16862
16863 ins_cost(300);
16864 format %{ "j$cop $labl" %}
16865 size(6);
16866 ins_encode %{
16867 Label* L = $labl$$label;
16868 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16869 %}
16870 ins_pipe(pipe_jcc);
16871 %}
16872
16873 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16874 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16875 %{
16876 match(CountedLoopEnd cop cr);
16877 effect(USE labl);
16878
16879 ins_cost(300);
16880 format %{ "j$cop $labl\t# loop end" %}
16881 size(6);
16882 ins_encode %{
16883 Label* L = $labl$$label;
16884 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16885 %}
16886 ins_pipe(pipe_jcc);
16887 %}
16888
16889 // Jump Direct Conditional - using unsigned comparison
16890 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16891 match(If cop cmp);
16892 effect(USE labl);
16893
16894 ins_cost(300);
16895 format %{ "j$cop,u $labl" %}
16896 size(6);
16897 ins_encode %{
16898 Label* L = $labl$$label;
16899 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16900 %}
16901 ins_pipe(pipe_jcc);
16902 %}
16903
16904 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16905 match(If cop cmp);
16906 effect(USE labl);
16907
16908 ins_cost(200);
16909 format %{ "j$cop,u $labl" %}
16910 size(6);
16911 ins_encode %{
16912 Label* L = $labl$$label;
16913 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16914 %}
16915 ins_pipe(pipe_jcc);
16916 %}
16917
16918 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16919 match(If cop cmp);
16920 effect(USE labl);
16921
16922 ins_cost(200);
16923 format %{ $$template
16924 if ($cop$$cmpcode == Assembler::notEqual) {
16925 $$emit$$"jp,u $labl\n\t"
16926 $$emit$$"j$cop,u $labl"
16927 } else {
16928 $$emit$$"jp,u done\n\t"
16929 $$emit$$"j$cop,u $labl\n\t"
16930 $$emit$$"done:"
16931 }
16932 %}
16933 ins_encode %{
16934 Label* l = $labl$$label;
16935 if ($cop$$cmpcode == Assembler::notEqual) {
16936 __ jcc(Assembler::parity, *l, false);
16937 __ jcc(Assembler::notEqual, *l, false);
16938 } else if ($cop$$cmpcode == Assembler::equal) {
16939 Label done;
16940 __ jccb(Assembler::parity, done);
16941 __ jcc(Assembler::equal, *l, false);
16942 __ bind(done);
16943 } else {
16944 ShouldNotReachHere();
16945 }
16946 %}
16947 ins_pipe(pipe_jcc);
16948 %}
16949
16950 // Jump Direct Conditional - using signed and unsigned comparison
16951 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16952 match(If cop cmp);
16953 effect(USE labl);
16954
16955 ins_cost(200);
16956 format %{ "j$cop,su $labl" %}
16957 size(6);
16958 ins_encode %{
16959 Label* L = $labl$$label;
16960 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16961 %}
16962 ins_pipe(pipe_jcc);
16963 %}
16964
16965 // ============================================================================
16966 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16967 // superklass array for an instance of the superklass. Set a hidden
16968 // internal cache on a hit (cache is checked with exposed code in
16969 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16970 // encoding ALSO sets flags.
16971
16972 instruct partialSubtypeCheck(rdi_RegP result,
16973 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16974 rFlagsReg cr)
16975 %{
16976 match(Set result (PartialSubtypeCheck sub super));
16977 predicate(!UseSecondarySupersTable);
16978 effect(KILL rcx, KILL cr);
16979
16980 ins_cost(1100); // slightly larger than the next version
16981 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16982 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16983 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16984 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16985 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16986 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16987 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16988 "miss:\t" %}
16989
16990 ins_encode %{
16991 Label miss;
16992 // NB: Callers may assume that, when $result is a valid register,
16993 // check_klass_subtype_slow_path_linear sets it to a nonzero
16994 // value.
16995 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16996 $rcx$$Register, $result$$Register,
16997 nullptr, &miss,
16998 /*set_cond_codes:*/ true);
16999 __ xorptr($result$$Register, $result$$Register);
17000 __ bind(miss);
17001 %}
17002
17003 ins_pipe(pipe_slow);
17004 %}
17005
17006 // ============================================================================
17007 // Two versions of hashtable-based partialSubtypeCheck, both used when
17008 // we need to search for a super class in the secondary supers array.
17009 // The first is used when we don't know _a priori_ the class being
17010 // searched for. The second, far more common, is used when we do know:
17011 // this is used for instanceof, checkcast, and any case where C2 can
17012 // determine it by constant propagation.
17013
17014 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17015 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17016 rFlagsReg cr)
17017 %{
17018 match(Set result (PartialSubtypeCheck sub super));
17019 predicate(UseSecondarySupersTable);
17020 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17021
17022 ins_cost(1000);
17023 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17024
17025 ins_encode %{
17026 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17027 $temp3$$Register, $temp4$$Register, $result$$Register);
17028 %}
17029
17030 ins_pipe(pipe_slow);
17031 %}
17032
17033 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17034 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17035 rFlagsReg cr)
17036 %{
17037 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17038 predicate(UseSecondarySupersTable);
17039 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17040
17041 ins_cost(700); // smaller than the next version
17042 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17043
17044 ins_encode %{
17045 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17046 if (InlineSecondarySupersTest) {
17047 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17048 $temp3$$Register, $temp4$$Register, $result$$Register,
17049 super_klass_slot);
17050 } else {
17051 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17052 }
17053 %}
17054
17055 ins_pipe(pipe_slow);
17056 %}
17057
17058 // ============================================================================
17059 // Branch Instructions -- short offset versions
17060 //
17061 // These instructions are used to replace jumps of a long offset (the default
17062 // match) with jumps of a shorter offset. These instructions are all tagged
17063 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17064 // match rules in general matching. Instead, the ADLC generates a conversion
17065 // method in the MachNode which can be used to do in-place replacement of the
17066 // long variant with the shorter variant. The compiler will determine if a
17067 // branch can be taken by the is_short_branch_offset() predicate in the machine
17068 // specific code section of the file.
17069
17070 // Jump Direct - Label defines a relative address from JMP+1
17071 instruct jmpDir_short(label labl) %{
17072 match(Goto);
17073 effect(USE labl);
17074
17075 ins_cost(300);
17076 format %{ "jmp,s $labl" %}
17077 size(2);
17078 ins_encode %{
17079 Label* L = $labl$$label;
17080 __ jmpb(*L);
17081 %}
17082 ins_pipe(pipe_jmp);
17083 ins_short_branch(1);
17084 %}
17085
17086 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17087 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17088 match(If cop cr);
17089 effect(USE labl);
17090
17091 ins_cost(300);
17092 format %{ "j$cop,s $labl" %}
17093 size(2);
17094 ins_encode %{
17095 Label* L = $labl$$label;
17096 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17097 %}
17098 ins_pipe(pipe_jcc);
17099 ins_short_branch(1);
17100 %}
17101
17102 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17103 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17104 match(CountedLoopEnd cop cr);
17105 effect(USE labl);
17106
17107 ins_cost(300);
17108 format %{ "j$cop,s $labl\t# loop end" %}
17109 size(2);
17110 ins_encode %{
17111 Label* L = $labl$$label;
17112 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17113 %}
17114 ins_pipe(pipe_jcc);
17115 ins_short_branch(1);
17116 %}
17117
17118 // Jump Direct Conditional - using unsigned comparison
17119 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17120 match(If cop cmp);
17121 effect(USE labl);
17122
17123 ins_cost(300);
17124 format %{ "j$cop,us $labl" %}
17125 size(2);
17126 ins_encode %{
17127 Label* L = $labl$$label;
17128 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17129 %}
17130 ins_pipe(pipe_jcc);
17131 ins_short_branch(1);
17132 %}
17133
17134 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17135 match(If cop cmp);
17136 effect(USE labl);
17137
17138 ins_cost(300);
17139 format %{ "j$cop,us $labl" %}
17140 size(2);
17141 ins_encode %{
17142 Label* L = $labl$$label;
17143 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17144 %}
17145 ins_pipe(pipe_jcc);
17146 ins_short_branch(1);
17147 %}
17148
17149 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17150 match(If cop cmp);
17151 effect(USE labl);
17152
17153 ins_cost(300);
17154 format %{ $$template
17155 if ($cop$$cmpcode == Assembler::notEqual) {
17156 $$emit$$"jp,u,s $labl\n\t"
17157 $$emit$$"j$cop,u,s $labl"
17158 } else {
17159 $$emit$$"jp,u,s done\n\t"
17160 $$emit$$"j$cop,u,s $labl\n\t"
17161 $$emit$$"done:"
17162 }
17163 %}
17164 size(4);
17165 ins_encode %{
17166 Label* l = $labl$$label;
17167 if ($cop$$cmpcode == Assembler::notEqual) {
17168 __ jccb(Assembler::parity, *l);
17169 __ jccb(Assembler::notEqual, *l);
17170 } else if ($cop$$cmpcode == Assembler::equal) {
17171 Label done;
17172 __ jccb(Assembler::parity, done);
17173 __ jccb(Assembler::equal, *l);
17174 __ bind(done);
17175 } else {
17176 ShouldNotReachHere();
17177 }
17178 %}
17179 ins_pipe(pipe_jcc);
17180 ins_short_branch(1);
17181 %}
17182
17183 // Jump Direct Conditional - using signed and unsigned comparison
17184 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17185 match(If cop cmp);
17186 effect(USE labl);
17187
17188 ins_cost(300);
17189 format %{ "j$cop,sus $labl" %}
17190 size(2);
17191 ins_encode %{
17192 Label* L = $labl$$label;
17193 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17194 %}
17195 ins_pipe(pipe_jcc);
17196 ins_short_branch(1);
17197 %}
17198
17199 // ============================================================================
17200 // inlined locking and unlocking
17201
17202 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17203 match(Set cr (FastLock object box));
17204 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17205 ins_cost(300);
17206 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17207 ins_encode %{
17208 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17209 %}
17210 ins_pipe(pipe_slow);
17211 %}
17212
17213 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17214 match(Set cr (FastUnlock object rax_reg));
17215 effect(TEMP tmp, USE_KILL rax_reg);
17216 ins_cost(300);
17217 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17218 ins_encode %{
17219 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17220 %}
17221 ins_pipe(pipe_slow);
17222 %}
17223
17224
17225 // ============================================================================
17226 // Safepoint Instructions
17227 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17228 %{
17229 match(SafePoint poll);
17230 effect(KILL cr, USE poll);
17231
17232 format %{ "testl rax, [$poll]\t"
17233 "# Safepoint: poll for GC" %}
17234 ins_cost(125);
17235 ins_encode %{
17236 __ relocate(relocInfo::poll_type);
17237 address pre_pc = __ pc();
17238 __ testl(rax, Address($poll$$Register, 0));
17239 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17240 %}
17241 ins_pipe(ialu_reg_mem);
17242 %}
17243
17244 instruct mask_all_evexL(kReg dst, rRegL src) %{
17245 match(Set dst (MaskAll src));
17246 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17247 ins_encode %{
17248 int mask_len = Matcher::vector_length(this);
17249 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17250 %}
17251 ins_pipe( pipe_slow );
17252 %}
17253
17254 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17255 predicate(Matcher::vector_length(n) > 32);
17256 match(Set dst (MaskAll src));
17257 effect(TEMP tmp);
17258 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17259 ins_encode %{
17260 int mask_len = Matcher::vector_length(this);
17261 __ movslq($tmp$$Register, $src$$Register);
17262 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17263 %}
17264 ins_pipe( pipe_slow );
17265 %}
17266
17267 // ============================================================================
17268 // Procedure Call/Return Instructions
17269 // Call Java Static Instruction
17270 // Note: If this code changes, the corresponding ret_addr_offset() and
17271 // compute_padding() functions will have to be adjusted.
17272 instruct CallStaticJavaDirect(method meth) %{
17273 match(CallStaticJava);
17274 effect(USE meth);
17275
17276 ins_cost(300);
17277 format %{ "call,static " %}
17278 opcode(0xE8); /* E8 cd */
17279 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17280 ins_pipe(pipe_slow);
17281 ins_alignment(4);
17282 %}
17283
17284 // Call Java Dynamic Instruction
17285 // Note: If this code changes, the corresponding ret_addr_offset() and
17286 // compute_padding() functions will have to be adjusted.
17287 instruct CallDynamicJavaDirect(method meth)
17288 %{
17289 match(CallDynamicJava);
17290 effect(USE meth);
17291
17292 ins_cost(300);
17293 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17294 "call,dynamic " %}
17295 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17296 ins_pipe(pipe_slow);
17297 ins_alignment(4);
17298 %}
17299
17300 // Call Runtime Instruction
17301 instruct CallRuntimeDirect(method meth)
17302 %{
17303 match(CallRuntime);
17304 effect(USE meth);
17305
17306 ins_cost(300);
17307 format %{ "call,runtime " %}
17308 ins_encode(clear_avx, Java_To_Runtime(meth));
17309 ins_pipe(pipe_slow);
17310 %}
17311
17312 // Call runtime without safepoint
17313 instruct CallLeafDirect(method meth)
17314 %{
17315 match(CallLeaf);
17316 effect(USE meth);
17317
17318 ins_cost(300);
17319 format %{ "call_leaf,runtime " %}
17320 ins_encode(clear_avx, Java_To_Runtime(meth));
17321 ins_pipe(pipe_slow);
17322 %}
17323
17324 // Call runtime without safepoint and with vector arguments
17325 instruct CallLeafDirectVector(method meth)
17326 %{
17327 match(CallLeafVector);
17328 effect(USE meth);
17329
17330 ins_cost(300);
17331 format %{ "call_leaf,vector " %}
17332 ins_encode(Java_To_Runtime(meth));
17333 ins_pipe(pipe_slow);
17334 %}
17335
17336 // Call runtime without safepoint
17337 instruct CallLeafNoFPDirect(method meth)
17338 %{
17339 match(CallLeafNoFP);
17340 effect(USE meth);
17341
17342 ins_cost(300);
17343 format %{ "call_leaf_nofp,runtime " %}
17344 ins_encode(clear_avx, Java_To_Runtime(meth));
17345 ins_pipe(pipe_slow);
17346 %}
17347
17348 // Return Instruction
17349 // Remove the return address & jump to it.
17350 // Notice: We always emit a nop after a ret to make sure there is room
17351 // for safepoint patching
17352 instruct Ret()
17353 %{
17354 match(Return);
17355
17356 format %{ "ret" %}
17357 ins_encode %{
17358 __ ret(0);
17359 %}
17360 ins_pipe(pipe_jmp);
17361 %}
17362
17363 // Tail Call; Jump from runtime stub to Java code.
17364 // Also known as an 'interprocedural jump'.
17365 // Target of jump will eventually return to caller.
17366 // TailJump below removes the return address.
17367 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17368 // emitted just above the TailCall which has reset rbp to the caller state.
17369 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17370 %{
17371 match(TailCall jump_target method_ptr);
17372
17373 ins_cost(300);
17374 format %{ "jmp $jump_target\t# rbx holds method" %}
17375 ins_encode %{
17376 __ jmp($jump_target$$Register);
17377 %}
17378 ins_pipe(pipe_jmp);
17379 %}
17380
17381 // Tail Jump; remove the return address; jump to target.
17382 // TailCall above leaves the return address around.
17383 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17384 %{
17385 match(TailJump jump_target ex_oop);
17386
17387 ins_cost(300);
17388 format %{ "popq rdx\t# pop return address\n\t"
17389 "jmp $jump_target" %}
17390 ins_encode %{
17391 __ popq(as_Register(RDX_enc));
17392 __ jmp($jump_target$$Register);
17393 %}
17394 ins_pipe(pipe_jmp);
17395 %}
17396
17397 // Forward exception.
17398 instruct ForwardExceptionjmp()
17399 %{
17400 match(ForwardException);
17401
17402 format %{ "jmp forward_exception_stub" %}
17403 ins_encode %{
17404 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17405 %}
17406 ins_pipe(pipe_jmp);
17407 %}
17408
17409 // Create exception oop: created by stack-crawling runtime code.
17410 // Created exception is now available to this handler, and is setup
17411 // just prior to jumping to this handler. No code emitted.
17412 instruct CreateException(rax_RegP ex_oop)
17413 %{
17414 match(Set ex_oop (CreateEx));
17415
17416 size(0);
17417 // use the following format syntax
17418 format %{ "# exception oop is in rax; no code emitted" %}
17419 ins_encode();
17420 ins_pipe(empty);
17421 %}
17422
17423 // Rethrow exception:
17424 // The exception oop will come in the first argument position.
17425 // Then JUMP (not call) to the rethrow stub code.
17426 instruct RethrowException()
17427 %{
17428 match(Rethrow);
17429
17430 // use the following format syntax
17431 format %{ "jmp rethrow_stub" %}
17432 ins_encode %{
17433 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17434 %}
17435 ins_pipe(pipe_jmp);
17436 %}
17437
17438 // ============================================================================
17439 // This name is KNOWN by the ADLC and cannot be changed.
17440 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17441 // for this guy.
17442 instruct tlsLoadP(r15_RegP dst) %{
17443 match(Set dst (ThreadLocal));
17444 effect(DEF dst);
17445
17446 size(0);
17447 format %{ "# TLS is in R15" %}
17448 ins_encode( /*empty encoding*/ );
17449 ins_pipe(ialu_reg_reg);
17450 %}
17451
17452 instruct addF_reg(regF dst, regF src) %{
17453 predicate(UseAVX == 0);
17454 match(Set dst (AddF dst src));
17455
17456 format %{ "addss $dst, $src" %}
17457 ins_cost(150);
17458 ins_encode %{
17459 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17460 %}
17461 ins_pipe(pipe_slow);
17462 %}
17463
17464 instruct addF_mem(regF dst, memory src) %{
17465 predicate(UseAVX == 0);
17466 match(Set dst (AddF dst (LoadF src)));
17467
17468 format %{ "addss $dst, $src" %}
17469 ins_cost(150);
17470 ins_encode %{
17471 __ addss($dst$$XMMRegister, $src$$Address);
17472 %}
17473 ins_pipe(pipe_slow);
17474 %}
17475
17476 instruct addF_imm(regF dst, immF con) %{
17477 predicate(UseAVX == 0);
17478 match(Set dst (AddF dst con));
17479 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17480 ins_cost(150);
17481 ins_encode %{
17482 __ addss($dst$$XMMRegister, $constantaddress($con));
17483 %}
17484 ins_pipe(pipe_slow);
17485 %}
17486
17487 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17488 predicate(UseAVX > 0);
17489 match(Set dst (AddF src1 src2));
17490
17491 format %{ "vaddss $dst, $src1, $src2" %}
17492 ins_cost(150);
17493 ins_encode %{
17494 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17495 %}
17496 ins_pipe(pipe_slow);
17497 %}
17498
17499 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17500 predicate(UseAVX > 0);
17501 match(Set dst (AddF src1 (LoadF src2)));
17502
17503 format %{ "vaddss $dst, $src1, $src2" %}
17504 ins_cost(150);
17505 ins_encode %{
17506 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17507 %}
17508 ins_pipe(pipe_slow);
17509 %}
17510
17511 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17512 predicate(UseAVX > 0);
17513 match(Set dst (AddF src con));
17514
17515 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17516 ins_cost(150);
17517 ins_encode %{
17518 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17519 %}
17520 ins_pipe(pipe_slow);
17521 %}
17522
17523 instruct addD_reg(regD dst, regD src) %{
17524 predicate(UseAVX == 0);
17525 match(Set dst (AddD dst src));
17526
17527 format %{ "addsd $dst, $src" %}
17528 ins_cost(150);
17529 ins_encode %{
17530 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17531 %}
17532 ins_pipe(pipe_slow);
17533 %}
17534
17535 instruct addD_mem(regD dst, memory src) %{
17536 predicate(UseAVX == 0);
17537 match(Set dst (AddD dst (LoadD src)));
17538
17539 format %{ "addsd $dst, $src" %}
17540 ins_cost(150);
17541 ins_encode %{
17542 __ addsd($dst$$XMMRegister, $src$$Address);
17543 %}
17544 ins_pipe(pipe_slow);
17545 %}
17546
17547 instruct addD_imm(regD dst, immD con) %{
17548 predicate(UseAVX == 0);
17549 match(Set dst (AddD dst con));
17550 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17551 ins_cost(150);
17552 ins_encode %{
17553 __ addsd($dst$$XMMRegister, $constantaddress($con));
17554 %}
17555 ins_pipe(pipe_slow);
17556 %}
17557
17558 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17559 predicate(UseAVX > 0);
17560 match(Set dst (AddD src1 src2));
17561
17562 format %{ "vaddsd $dst, $src1, $src2" %}
17563 ins_cost(150);
17564 ins_encode %{
17565 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17566 %}
17567 ins_pipe(pipe_slow);
17568 %}
17569
17570 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17571 predicate(UseAVX > 0);
17572 match(Set dst (AddD src1 (LoadD src2)));
17573
17574 format %{ "vaddsd $dst, $src1, $src2" %}
17575 ins_cost(150);
17576 ins_encode %{
17577 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17578 %}
17579 ins_pipe(pipe_slow);
17580 %}
17581
17582 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17583 predicate(UseAVX > 0);
17584 match(Set dst (AddD src con));
17585
17586 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17587 ins_cost(150);
17588 ins_encode %{
17589 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17590 %}
17591 ins_pipe(pipe_slow);
17592 %}
17593
17594 instruct subF_reg(regF dst, regF src) %{
17595 predicate(UseAVX == 0);
17596 match(Set dst (SubF dst src));
17597
17598 format %{ "subss $dst, $src" %}
17599 ins_cost(150);
17600 ins_encode %{
17601 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17602 %}
17603 ins_pipe(pipe_slow);
17604 %}
17605
17606 instruct subF_mem(regF dst, memory src) %{
17607 predicate(UseAVX == 0);
17608 match(Set dst (SubF dst (LoadF src)));
17609
17610 format %{ "subss $dst, $src" %}
17611 ins_cost(150);
17612 ins_encode %{
17613 __ subss($dst$$XMMRegister, $src$$Address);
17614 %}
17615 ins_pipe(pipe_slow);
17616 %}
17617
17618 instruct subF_imm(regF dst, immF con) %{
17619 predicate(UseAVX == 0);
17620 match(Set dst (SubF dst con));
17621 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17622 ins_cost(150);
17623 ins_encode %{
17624 __ subss($dst$$XMMRegister, $constantaddress($con));
17625 %}
17626 ins_pipe(pipe_slow);
17627 %}
17628
17629 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17630 predicate(UseAVX > 0);
17631 match(Set dst (SubF src1 src2));
17632
17633 format %{ "vsubss $dst, $src1, $src2" %}
17634 ins_cost(150);
17635 ins_encode %{
17636 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17637 %}
17638 ins_pipe(pipe_slow);
17639 %}
17640
17641 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17642 predicate(UseAVX > 0);
17643 match(Set dst (SubF src1 (LoadF src2)));
17644
17645 format %{ "vsubss $dst, $src1, $src2" %}
17646 ins_cost(150);
17647 ins_encode %{
17648 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17649 %}
17650 ins_pipe(pipe_slow);
17651 %}
17652
17653 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17654 predicate(UseAVX > 0);
17655 match(Set dst (SubF src con));
17656
17657 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17658 ins_cost(150);
17659 ins_encode %{
17660 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17661 %}
17662 ins_pipe(pipe_slow);
17663 %}
17664
17665 instruct subD_reg(regD dst, regD src) %{
17666 predicate(UseAVX == 0);
17667 match(Set dst (SubD dst src));
17668
17669 format %{ "subsd $dst, $src" %}
17670 ins_cost(150);
17671 ins_encode %{
17672 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17673 %}
17674 ins_pipe(pipe_slow);
17675 %}
17676
17677 instruct subD_mem(regD dst, memory src) %{
17678 predicate(UseAVX == 0);
17679 match(Set dst (SubD dst (LoadD src)));
17680
17681 format %{ "subsd $dst, $src" %}
17682 ins_cost(150);
17683 ins_encode %{
17684 __ subsd($dst$$XMMRegister, $src$$Address);
17685 %}
17686 ins_pipe(pipe_slow);
17687 %}
17688
17689 instruct subD_imm(regD dst, immD con) %{
17690 predicate(UseAVX == 0);
17691 match(Set dst (SubD dst con));
17692 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17693 ins_cost(150);
17694 ins_encode %{
17695 __ subsd($dst$$XMMRegister, $constantaddress($con));
17696 %}
17697 ins_pipe(pipe_slow);
17698 %}
17699
17700 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17701 predicate(UseAVX > 0);
17702 match(Set dst (SubD src1 src2));
17703
17704 format %{ "vsubsd $dst, $src1, $src2" %}
17705 ins_cost(150);
17706 ins_encode %{
17707 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17708 %}
17709 ins_pipe(pipe_slow);
17710 %}
17711
17712 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17713 predicate(UseAVX > 0);
17714 match(Set dst (SubD src1 (LoadD src2)));
17715
17716 format %{ "vsubsd $dst, $src1, $src2" %}
17717 ins_cost(150);
17718 ins_encode %{
17719 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17720 %}
17721 ins_pipe(pipe_slow);
17722 %}
17723
17724 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17725 predicate(UseAVX > 0);
17726 match(Set dst (SubD src con));
17727
17728 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17729 ins_cost(150);
17730 ins_encode %{
17731 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17732 %}
17733 ins_pipe(pipe_slow);
17734 %}
17735
17736 instruct mulF_reg(regF dst, regF src) %{
17737 predicate(UseAVX == 0);
17738 match(Set dst (MulF dst src));
17739
17740 format %{ "mulss $dst, $src" %}
17741 ins_cost(150);
17742 ins_encode %{
17743 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17744 %}
17745 ins_pipe(pipe_slow);
17746 %}
17747
17748 instruct mulF_mem(regF dst, memory src) %{
17749 predicate(UseAVX == 0);
17750 match(Set dst (MulF dst (LoadF src)));
17751
17752 format %{ "mulss $dst, $src" %}
17753 ins_cost(150);
17754 ins_encode %{
17755 __ mulss($dst$$XMMRegister, $src$$Address);
17756 %}
17757 ins_pipe(pipe_slow);
17758 %}
17759
17760 instruct mulF_imm(regF dst, immF con) %{
17761 predicate(UseAVX == 0);
17762 match(Set dst (MulF dst con));
17763 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17764 ins_cost(150);
17765 ins_encode %{
17766 __ mulss($dst$$XMMRegister, $constantaddress($con));
17767 %}
17768 ins_pipe(pipe_slow);
17769 %}
17770
17771 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17772 predicate(UseAVX > 0);
17773 match(Set dst (MulF src1 src2));
17774
17775 format %{ "vmulss $dst, $src1, $src2" %}
17776 ins_cost(150);
17777 ins_encode %{
17778 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17779 %}
17780 ins_pipe(pipe_slow);
17781 %}
17782
17783 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17784 predicate(UseAVX > 0);
17785 match(Set dst (MulF src1 (LoadF src2)));
17786
17787 format %{ "vmulss $dst, $src1, $src2" %}
17788 ins_cost(150);
17789 ins_encode %{
17790 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17791 %}
17792 ins_pipe(pipe_slow);
17793 %}
17794
17795 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17796 predicate(UseAVX > 0);
17797 match(Set dst (MulF src con));
17798
17799 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17800 ins_cost(150);
17801 ins_encode %{
17802 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17803 %}
17804 ins_pipe(pipe_slow);
17805 %}
17806
17807 instruct mulD_reg(regD dst, regD src) %{
17808 predicate(UseAVX == 0);
17809 match(Set dst (MulD dst src));
17810
17811 format %{ "mulsd $dst, $src" %}
17812 ins_cost(150);
17813 ins_encode %{
17814 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17815 %}
17816 ins_pipe(pipe_slow);
17817 %}
17818
17819 instruct mulD_mem(regD dst, memory src) %{
17820 predicate(UseAVX == 0);
17821 match(Set dst (MulD dst (LoadD src)));
17822
17823 format %{ "mulsd $dst, $src" %}
17824 ins_cost(150);
17825 ins_encode %{
17826 __ mulsd($dst$$XMMRegister, $src$$Address);
17827 %}
17828 ins_pipe(pipe_slow);
17829 %}
17830
17831 instruct mulD_imm(regD dst, immD con) %{
17832 predicate(UseAVX == 0);
17833 match(Set dst (MulD dst con));
17834 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17835 ins_cost(150);
17836 ins_encode %{
17837 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17838 %}
17839 ins_pipe(pipe_slow);
17840 %}
17841
17842 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17843 predicate(UseAVX > 0);
17844 match(Set dst (MulD src1 src2));
17845
17846 format %{ "vmulsd $dst, $src1, $src2" %}
17847 ins_cost(150);
17848 ins_encode %{
17849 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17850 %}
17851 ins_pipe(pipe_slow);
17852 %}
17853
17854 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17855 predicate(UseAVX > 0);
17856 match(Set dst (MulD src1 (LoadD src2)));
17857
17858 format %{ "vmulsd $dst, $src1, $src2" %}
17859 ins_cost(150);
17860 ins_encode %{
17861 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17862 %}
17863 ins_pipe(pipe_slow);
17864 %}
17865
17866 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17867 predicate(UseAVX > 0);
17868 match(Set dst (MulD src con));
17869
17870 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17871 ins_cost(150);
17872 ins_encode %{
17873 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17874 %}
17875 ins_pipe(pipe_slow);
17876 %}
17877
17878 instruct divF_reg(regF dst, regF src) %{
17879 predicate(UseAVX == 0);
17880 match(Set dst (DivF dst src));
17881
17882 format %{ "divss $dst, $src" %}
17883 ins_cost(150);
17884 ins_encode %{
17885 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17886 %}
17887 ins_pipe(pipe_slow);
17888 %}
17889
17890 instruct divF_mem(regF dst, memory src) %{
17891 predicate(UseAVX == 0);
17892 match(Set dst (DivF dst (LoadF src)));
17893
17894 format %{ "divss $dst, $src" %}
17895 ins_cost(150);
17896 ins_encode %{
17897 __ divss($dst$$XMMRegister, $src$$Address);
17898 %}
17899 ins_pipe(pipe_slow);
17900 %}
17901
17902 instruct divF_imm(regF dst, immF con) %{
17903 predicate(UseAVX == 0);
17904 match(Set dst (DivF dst con));
17905 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17906 ins_cost(150);
17907 ins_encode %{
17908 __ divss($dst$$XMMRegister, $constantaddress($con));
17909 %}
17910 ins_pipe(pipe_slow);
17911 %}
17912
17913 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17914 predicate(UseAVX > 0);
17915 match(Set dst (DivF src1 src2));
17916
17917 format %{ "vdivss $dst, $src1, $src2" %}
17918 ins_cost(150);
17919 ins_encode %{
17920 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17921 %}
17922 ins_pipe(pipe_slow);
17923 %}
17924
17925 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17926 predicate(UseAVX > 0);
17927 match(Set dst (DivF src1 (LoadF src2)));
17928
17929 format %{ "vdivss $dst, $src1, $src2" %}
17930 ins_cost(150);
17931 ins_encode %{
17932 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17933 %}
17934 ins_pipe(pipe_slow);
17935 %}
17936
17937 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17938 predicate(UseAVX > 0);
17939 match(Set dst (DivF src con));
17940
17941 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17942 ins_cost(150);
17943 ins_encode %{
17944 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17945 %}
17946 ins_pipe(pipe_slow);
17947 %}
17948
17949 instruct divD_reg(regD dst, regD src) %{
17950 predicate(UseAVX == 0);
17951 match(Set dst (DivD dst src));
17952
17953 format %{ "divsd $dst, $src" %}
17954 ins_cost(150);
17955 ins_encode %{
17956 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17957 %}
17958 ins_pipe(pipe_slow);
17959 %}
17960
17961 instruct divD_mem(regD dst, memory src) %{
17962 predicate(UseAVX == 0);
17963 match(Set dst (DivD dst (LoadD src)));
17964
17965 format %{ "divsd $dst, $src" %}
17966 ins_cost(150);
17967 ins_encode %{
17968 __ divsd($dst$$XMMRegister, $src$$Address);
17969 %}
17970 ins_pipe(pipe_slow);
17971 %}
17972
17973 instruct divD_imm(regD dst, immD con) %{
17974 predicate(UseAVX == 0);
17975 match(Set dst (DivD dst con));
17976 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17977 ins_cost(150);
17978 ins_encode %{
17979 __ divsd($dst$$XMMRegister, $constantaddress($con));
17980 %}
17981 ins_pipe(pipe_slow);
17982 %}
17983
17984 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17985 predicate(UseAVX > 0);
17986 match(Set dst (DivD src1 src2));
17987
17988 format %{ "vdivsd $dst, $src1, $src2" %}
17989 ins_cost(150);
17990 ins_encode %{
17991 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17992 %}
17993 ins_pipe(pipe_slow);
17994 %}
17995
17996 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17997 predicate(UseAVX > 0);
17998 match(Set dst (DivD src1 (LoadD src2)));
17999
18000 format %{ "vdivsd $dst, $src1, $src2" %}
18001 ins_cost(150);
18002 ins_encode %{
18003 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18004 %}
18005 ins_pipe(pipe_slow);
18006 %}
18007
18008 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18009 predicate(UseAVX > 0);
18010 match(Set dst (DivD src con));
18011
18012 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18013 ins_cost(150);
18014 ins_encode %{
18015 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18016 %}
18017 ins_pipe(pipe_slow);
18018 %}
18019
18020 instruct absF_reg(regF dst) %{
18021 predicate(UseAVX == 0);
18022 match(Set dst (AbsF dst));
18023 ins_cost(150);
18024 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18025 ins_encode %{
18026 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18027 %}
18028 ins_pipe(pipe_slow);
18029 %}
18030
18031 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18032 predicate(UseAVX > 0);
18033 match(Set dst (AbsF src));
18034 ins_cost(150);
18035 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18036 ins_encode %{
18037 int vlen_enc = Assembler::AVX_128bit;
18038 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18039 ExternalAddress(float_signmask()), vlen_enc);
18040 %}
18041 ins_pipe(pipe_slow);
18042 %}
18043
18044 instruct absD_reg(regD dst) %{
18045 predicate(UseAVX == 0);
18046 match(Set dst (AbsD dst));
18047 ins_cost(150);
18048 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18049 "# abs double by sign masking" %}
18050 ins_encode %{
18051 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18052 %}
18053 ins_pipe(pipe_slow);
18054 %}
18055
18056 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18057 predicate(UseAVX > 0);
18058 match(Set dst (AbsD src));
18059 ins_cost(150);
18060 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18061 "# abs double by sign masking" %}
18062 ins_encode %{
18063 int vlen_enc = Assembler::AVX_128bit;
18064 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18065 ExternalAddress(double_signmask()), vlen_enc);
18066 %}
18067 ins_pipe(pipe_slow);
18068 %}
18069
18070 instruct negF_reg(regF dst) %{
18071 predicate(UseAVX == 0);
18072 match(Set dst (NegF dst));
18073 ins_cost(150);
18074 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18075 ins_encode %{
18076 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18077 %}
18078 ins_pipe(pipe_slow);
18079 %}
18080
18081 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18082 predicate(UseAVX > 0);
18083 match(Set dst (NegF src));
18084 ins_cost(150);
18085 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18086 ins_encode %{
18087 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18088 ExternalAddress(float_signflip()));
18089 %}
18090 ins_pipe(pipe_slow);
18091 %}
18092
18093 instruct negD_reg(regD dst) %{
18094 predicate(UseAVX == 0);
18095 match(Set dst (NegD dst));
18096 ins_cost(150);
18097 format %{ "xorpd $dst, [0x8000000000000000]\t"
18098 "# neg double by sign flipping" %}
18099 ins_encode %{
18100 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18101 %}
18102 ins_pipe(pipe_slow);
18103 %}
18104
18105 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18106 predicate(UseAVX > 0);
18107 match(Set dst (NegD src));
18108 ins_cost(150);
18109 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18110 "# neg double by sign flipping" %}
18111 ins_encode %{
18112 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18113 ExternalAddress(double_signflip()));
18114 %}
18115 ins_pipe(pipe_slow);
18116 %}
18117
18118 // sqrtss instruction needs destination register to be pre initialized for best performance
18119 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18120 instruct sqrtF_reg(regF dst) %{
18121 match(Set dst (SqrtF dst));
18122 format %{ "sqrtss $dst, $dst" %}
18123 ins_encode %{
18124 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18125 %}
18126 ins_pipe(pipe_slow);
18127 %}
18128
18129 // sqrtsd instruction needs destination register to be pre initialized for best performance
18130 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18131 instruct sqrtD_reg(regD dst) %{
18132 match(Set dst (SqrtD dst));
18133 format %{ "sqrtsd $dst, $dst" %}
18134 ins_encode %{
18135 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18136 %}
18137 ins_pipe(pipe_slow);
18138 %}
18139
18140 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18141 effect(TEMP tmp);
18142 match(Set dst (ConvF2HF src));
18143 ins_cost(125);
18144 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18145 ins_encode %{
18146 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18147 %}
18148 ins_pipe( pipe_slow );
18149 %}
18150
18151 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18152 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18153 effect(TEMP ktmp, TEMP rtmp);
18154 match(Set mem (StoreC mem (ConvF2HF src)));
18155 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18156 ins_encode %{
18157 __ movl($rtmp$$Register, 0x1);
18158 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18159 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18160 %}
18161 ins_pipe( pipe_slow );
18162 %}
18163
18164 instruct vconvF2HF(vec dst, vec src) %{
18165 match(Set dst (VectorCastF2HF src));
18166 format %{ "vector_conv_F2HF $dst $src" %}
18167 ins_encode %{
18168 int vlen_enc = vector_length_encoding(this, $src);
18169 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18170 %}
18171 ins_pipe( pipe_slow );
18172 %}
18173
18174 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18175 predicate(n->as_StoreVector()->memory_size() >= 16);
18176 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18177 format %{ "vcvtps2ph $mem,$src" %}
18178 ins_encode %{
18179 int vlen_enc = vector_length_encoding(this, $src);
18180 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18181 %}
18182 ins_pipe( pipe_slow );
18183 %}
18184
18185 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18186 match(Set dst (ConvHF2F src));
18187 format %{ "vcvtph2ps $dst,$src" %}
18188 ins_encode %{
18189 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18190 %}
18191 ins_pipe( pipe_slow );
18192 %}
18193
18194 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18195 match(Set dst (VectorCastHF2F (LoadVector mem)));
18196 format %{ "vcvtph2ps $dst,$mem" %}
18197 ins_encode %{
18198 int vlen_enc = vector_length_encoding(this);
18199 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18200 %}
18201 ins_pipe( pipe_slow );
18202 %}
18203
18204 instruct vconvHF2F(vec dst, vec src) %{
18205 match(Set dst (VectorCastHF2F src));
18206 ins_cost(125);
18207 format %{ "vector_conv_HF2F $dst,$src" %}
18208 ins_encode %{
18209 int vlen_enc = vector_length_encoding(this);
18210 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18211 %}
18212 ins_pipe( pipe_slow );
18213 %}
18214
18215 // ---------------------------------------- VectorReinterpret ------------------------------------
18216 instruct reinterpret_mask(kReg dst) %{
18217 predicate(n->bottom_type()->isa_vectmask() &&
18218 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18219 match(Set dst (VectorReinterpret dst));
18220 ins_cost(125);
18221 format %{ "vector_reinterpret $dst\t!" %}
18222 ins_encode %{
18223 // empty
18224 %}
18225 ins_pipe( pipe_slow );
18226 %}
18227
18228 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18229 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18230 n->bottom_type()->isa_vectmask() &&
18231 n->in(1)->bottom_type()->isa_vectmask() &&
18232 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18233 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18234 match(Set dst (VectorReinterpret src));
18235 effect(TEMP xtmp);
18236 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18237 ins_encode %{
18238 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18239 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18240 assert(src_sz == dst_sz , "src and dst size mismatch");
18241 int vlen_enc = vector_length_encoding(src_sz);
18242 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18243 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18244 %}
18245 ins_pipe( pipe_slow );
18246 %}
18247
18248 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18249 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18250 n->bottom_type()->isa_vectmask() &&
18251 n->in(1)->bottom_type()->isa_vectmask() &&
18252 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18253 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18254 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18255 match(Set dst (VectorReinterpret src));
18256 effect(TEMP xtmp);
18257 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18258 ins_encode %{
18259 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18260 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18261 assert(src_sz == dst_sz , "src and dst size mismatch");
18262 int vlen_enc = vector_length_encoding(src_sz);
18263 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18264 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18265 %}
18266 ins_pipe( pipe_slow );
18267 %}
18268
18269 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18270 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18271 n->bottom_type()->isa_vectmask() &&
18272 n->in(1)->bottom_type()->isa_vectmask() &&
18273 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18274 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18275 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18276 match(Set dst (VectorReinterpret src));
18277 effect(TEMP xtmp);
18278 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18279 ins_encode %{
18280 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18281 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18282 assert(src_sz == dst_sz , "src and dst size mismatch");
18283 int vlen_enc = vector_length_encoding(src_sz);
18284 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18285 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18286 %}
18287 ins_pipe( pipe_slow );
18288 %}
18289
18290 instruct reinterpret(vec dst) %{
18291 predicate(!n->bottom_type()->isa_vectmask() &&
18292 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18293 match(Set dst (VectorReinterpret dst));
18294 ins_cost(125);
18295 format %{ "vector_reinterpret $dst\t!" %}
18296 ins_encode %{
18297 // empty
18298 %}
18299 ins_pipe( pipe_slow );
18300 %}
18301
18302 instruct reinterpret_expand(vec dst, vec src) %{
18303 predicate(UseAVX == 0 &&
18304 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18305 match(Set dst (VectorReinterpret src));
18306 ins_cost(125);
18307 effect(TEMP dst);
18308 format %{ "vector_reinterpret_expand $dst,$src" %}
18309 ins_encode %{
18310 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18311 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18312
18313 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18314 if (src_vlen_in_bytes == 4) {
18315 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18316 } else {
18317 assert(src_vlen_in_bytes == 8, "");
18318 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18319 }
18320 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18321 %}
18322 ins_pipe( pipe_slow );
18323 %}
18324
18325 instruct vreinterpret_expand4(legVec dst, vec src) %{
18326 predicate(UseAVX > 0 &&
18327 !n->bottom_type()->isa_vectmask() &&
18328 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18329 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18330 match(Set dst (VectorReinterpret src));
18331 ins_cost(125);
18332 format %{ "vector_reinterpret_expand $dst,$src" %}
18333 ins_encode %{
18334 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18335 %}
18336 ins_pipe( pipe_slow );
18337 %}
18338
18339
18340 instruct vreinterpret_expand(legVec dst, vec src) %{
18341 predicate(UseAVX > 0 &&
18342 !n->bottom_type()->isa_vectmask() &&
18343 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18344 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18345 match(Set dst (VectorReinterpret src));
18346 ins_cost(125);
18347 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18348 ins_encode %{
18349 switch (Matcher::vector_length_in_bytes(this, $src)) {
18350 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18351 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18352 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18353 default: ShouldNotReachHere();
18354 }
18355 %}
18356 ins_pipe( pipe_slow );
18357 %}
18358
18359 instruct reinterpret_shrink(vec dst, legVec src) %{
18360 predicate(!n->bottom_type()->isa_vectmask() &&
18361 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18362 match(Set dst (VectorReinterpret src));
18363 ins_cost(125);
18364 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18365 ins_encode %{
18366 switch (Matcher::vector_length_in_bytes(this)) {
18367 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18368 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18369 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18370 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18371 default: ShouldNotReachHere();
18372 }
18373 %}
18374 ins_pipe( pipe_slow );
18375 %}
18376
18377 // ----------------------------------------------------------------------------------------------------
18378
18379 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18380 match(Set dst (RoundDoubleMode src rmode));
18381 format %{ "roundsd $dst,$src" %}
18382 ins_cost(150);
18383 ins_encode %{
18384 assert(UseSSE >= 4, "required");
18385 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18386 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18387 }
18388 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18389 %}
18390 ins_pipe(pipe_slow);
18391 %}
18392
18393 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18394 match(Set dst (RoundDoubleMode con rmode));
18395 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18396 ins_cost(150);
18397 ins_encode %{
18398 assert(UseSSE >= 4, "required");
18399 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18400 %}
18401 ins_pipe(pipe_slow);
18402 %}
18403
18404 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18405 predicate(Matcher::vector_length(n) < 8);
18406 match(Set dst (RoundDoubleModeV src rmode));
18407 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18408 ins_encode %{
18409 assert(UseAVX > 0, "required");
18410 int vlen_enc = vector_length_encoding(this);
18411 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18412 %}
18413 ins_pipe( pipe_slow );
18414 %}
18415
18416 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18417 predicate(Matcher::vector_length(n) == 8);
18418 match(Set dst (RoundDoubleModeV src rmode));
18419 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18420 ins_encode %{
18421 assert(UseAVX > 2, "required");
18422 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18423 %}
18424 ins_pipe( pipe_slow );
18425 %}
18426
18427 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18428 predicate(Matcher::vector_length(n) < 8);
18429 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18430 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18431 ins_encode %{
18432 assert(UseAVX > 0, "required");
18433 int vlen_enc = vector_length_encoding(this);
18434 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18435 %}
18436 ins_pipe( pipe_slow );
18437 %}
18438
18439 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18440 predicate(Matcher::vector_length(n) == 8);
18441 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18442 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18443 ins_encode %{
18444 assert(UseAVX > 2, "required");
18445 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18446 %}
18447 ins_pipe( pipe_slow );
18448 %}
18449
18450 instruct onspinwait() %{
18451 match(OnSpinWait);
18452 ins_cost(200);
18453
18454 format %{
18455 $$template
18456 $$emit$$"pause\t! membar_onspinwait"
18457 %}
18458 ins_encode %{
18459 __ pause();
18460 %}
18461 ins_pipe(pipe_slow);
18462 %}
18463
18464 // a * b + c
18465 instruct fmaD_reg(regD a, regD b, regD c) %{
18466 match(Set c (FmaD c (Binary a b)));
18467 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18468 ins_cost(150);
18469 ins_encode %{
18470 assert(UseFMA, "Needs FMA instructions support.");
18471 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18472 %}
18473 ins_pipe( pipe_slow );
18474 %}
18475
18476 // a * b + c
18477 instruct fmaF_reg(regF a, regF b, regF c) %{
18478 match(Set c (FmaF c (Binary a b)));
18479 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18480 ins_cost(150);
18481 ins_encode %{
18482 assert(UseFMA, "Needs FMA instructions support.");
18483 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18484 %}
18485 ins_pipe( pipe_slow );
18486 %}
18487
18488 // ====================VECTOR INSTRUCTIONS=====================================
18489
18490 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18491 instruct MoveVec2Leg(legVec dst, vec src) %{
18492 match(Set dst src);
18493 format %{ "" %}
18494 ins_encode %{
18495 ShouldNotReachHere();
18496 %}
18497 ins_pipe( fpu_reg_reg );
18498 %}
18499
18500 instruct MoveLeg2Vec(vec dst, legVec src) %{
18501 match(Set dst src);
18502 format %{ "" %}
18503 ins_encode %{
18504 ShouldNotReachHere();
18505 %}
18506 ins_pipe( fpu_reg_reg );
18507 %}
18508
18509 // ============================================================================
18510
18511 // Load vectors generic operand pattern
18512 instruct loadV(vec dst, memory mem) %{
18513 match(Set dst (LoadVector mem));
18514 ins_cost(125);
18515 format %{ "load_vector $dst,$mem" %}
18516 ins_encode %{
18517 BasicType bt = Matcher::vector_element_basic_type(this);
18518 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18519 %}
18520 ins_pipe( pipe_slow );
18521 %}
18522
18523 // Store vectors generic operand pattern.
18524 instruct storeV(memory mem, vec src) %{
18525 match(Set mem (StoreVector mem src));
18526 ins_cost(145);
18527 format %{ "store_vector $mem,$src\n\t" %}
18528 ins_encode %{
18529 switch (Matcher::vector_length_in_bytes(this, $src)) {
18530 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18531 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18532 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18533 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18534 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18535 default: ShouldNotReachHere();
18536 }
18537 %}
18538 ins_pipe( pipe_slow );
18539 %}
18540
18541 // ---------------------------------------- Gather ------------------------------------
18542
18543 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18544
18545 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18546 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18547 Matcher::vector_length_in_bytes(n) <= 32);
18548 match(Set dst (LoadVectorGather mem idx));
18549 effect(TEMP dst, TEMP tmp, TEMP mask);
18550 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18551 ins_encode %{
18552 int vlen_enc = vector_length_encoding(this);
18553 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18554 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18555 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18556 __ lea($tmp$$Register, $mem$$Address);
18557 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18558 %}
18559 ins_pipe( pipe_slow );
18560 %}
18561
18562
18563 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18564 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18565 !is_subword_type(Matcher::vector_element_basic_type(n)));
18566 match(Set dst (LoadVectorGather mem idx));
18567 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18568 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18569 ins_encode %{
18570 int vlen_enc = vector_length_encoding(this);
18571 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18572 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18573 __ lea($tmp$$Register, $mem$$Address);
18574 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18575 %}
18576 ins_pipe( pipe_slow );
18577 %}
18578
18579 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18580 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18581 !is_subword_type(Matcher::vector_element_basic_type(n)));
18582 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18583 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18584 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18585 ins_encode %{
18586 assert(UseAVX > 2, "sanity");
18587 int vlen_enc = vector_length_encoding(this);
18588 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18589 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18590 // Note: Since gather instruction partially updates the opmask register used
18591 // for predication hense moving mask operand to a temporary.
18592 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18593 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18594 __ lea($tmp$$Register, $mem$$Address);
18595 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18596 %}
18597 ins_pipe( pipe_slow );
18598 %}
18599
18600 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18601 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18602 match(Set dst (LoadVectorGather mem idx_base));
18603 effect(TEMP tmp, TEMP rtmp);
18604 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18605 ins_encode %{
18606 int vlen_enc = vector_length_encoding(this);
18607 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18608 __ lea($tmp$$Register, $mem$$Address);
18609 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18610 %}
18611 ins_pipe( pipe_slow );
18612 %}
18613
18614 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18615 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18616 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18617 match(Set dst (LoadVectorGather mem idx_base));
18618 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18619 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18620 ins_encode %{
18621 int vlen_enc = vector_length_encoding(this);
18622 int vector_len = Matcher::vector_length(this);
18623 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18624 __ lea($tmp$$Register, $mem$$Address);
18625 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18626 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18627 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18628 %}
18629 ins_pipe( pipe_slow );
18630 %}
18631
18632 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18633 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18634 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18635 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18636 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18637 ins_encode %{
18638 int vlen_enc = vector_length_encoding(this);
18639 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18640 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18641 __ lea($tmp$$Register, $mem$$Address);
18642 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18643 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18644 %}
18645 ins_pipe( pipe_slow );
18646 %}
18647
18648 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18649 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18650 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18651 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18652 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18653 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18654 ins_encode %{
18655 int vlen_enc = vector_length_encoding(this);
18656 int vector_len = Matcher::vector_length(this);
18657 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18658 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18659 __ lea($tmp$$Register, $mem$$Address);
18660 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18661 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18662 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18663 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18664 %}
18665 ins_pipe( pipe_slow );
18666 %}
18667
18668 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18669 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18670 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18671 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18672 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18673 ins_encode %{
18674 int vlen_enc = vector_length_encoding(this);
18675 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18676 __ lea($tmp$$Register, $mem$$Address);
18677 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18678 if (elem_bt == T_SHORT) {
18679 __ movl($mask_idx$$Register, 0x55555555);
18680 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18681 }
18682 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18683 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18684 %}
18685 ins_pipe( pipe_slow );
18686 %}
18687
18688 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18689 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18690 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18691 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18692 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18693 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18694 ins_encode %{
18695 int vlen_enc = vector_length_encoding(this);
18696 int vector_len = Matcher::vector_length(this);
18697 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18698 __ lea($tmp$$Register, $mem$$Address);
18699 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18700 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18701 if (elem_bt == T_SHORT) {
18702 __ movl($mask_idx$$Register, 0x55555555);
18703 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18704 }
18705 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18706 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18707 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18708 %}
18709 ins_pipe( pipe_slow );
18710 %}
18711
18712 // ====================Scatter=======================================
18713
18714 // Scatter INT, LONG, FLOAT, DOUBLE
18715
18716 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18717 predicate(UseAVX > 2);
18718 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18719 effect(TEMP tmp, TEMP ktmp);
18720 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18721 ins_encode %{
18722 int vlen_enc = vector_length_encoding(this, $src);
18723 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18724
18725 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18726 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18727
18728 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18729 __ lea($tmp$$Register, $mem$$Address);
18730 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18731 %}
18732 ins_pipe( pipe_slow );
18733 %}
18734
18735 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18736 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18737 effect(TEMP tmp, TEMP ktmp);
18738 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18739 ins_encode %{
18740 int vlen_enc = vector_length_encoding(this, $src);
18741 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18742 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18743 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18744 // Note: Since scatter instruction partially updates the opmask register used
18745 // for predication hense moving mask operand to a temporary.
18746 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18747 __ lea($tmp$$Register, $mem$$Address);
18748 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18749 %}
18750 ins_pipe( pipe_slow );
18751 %}
18752
18753 // ====================REPLICATE=======================================
18754
18755 // Replicate byte scalar to be vector
18756 instruct vReplB_reg(vec dst, rRegI src) %{
18757 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18758 match(Set dst (Replicate src));
18759 format %{ "replicateB $dst,$src" %}
18760 ins_encode %{
18761 uint vlen = Matcher::vector_length(this);
18762 if (UseAVX >= 2) {
18763 int vlen_enc = vector_length_encoding(this);
18764 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18765 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18766 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18767 } else {
18768 __ movdl($dst$$XMMRegister, $src$$Register);
18769 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18770 }
18771 } else {
18772 assert(UseAVX < 2, "");
18773 __ movdl($dst$$XMMRegister, $src$$Register);
18774 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18775 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18776 if (vlen >= 16) {
18777 assert(vlen == 16, "");
18778 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18779 }
18780 }
18781 %}
18782 ins_pipe( pipe_slow );
18783 %}
18784
18785 instruct ReplB_mem(vec dst, memory mem) %{
18786 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18787 match(Set dst (Replicate (LoadB mem)));
18788 format %{ "replicateB $dst,$mem" %}
18789 ins_encode %{
18790 int vlen_enc = vector_length_encoding(this);
18791 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18792 %}
18793 ins_pipe( pipe_slow );
18794 %}
18795
18796 // ====================ReplicateS=======================================
18797
18798 instruct vReplS_reg(vec dst, rRegI src) %{
18799 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18800 match(Set dst (Replicate src));
18801 format %{ "replicateS $dst,$src" %}
18802 ins_encode %{
18803 uint vlen = Matcher::vector_length(this);
18804 int vlen_enc = vector_length_encoding(this);
18805 if (UseAVX >= 2) {
18806 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18807 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18808 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18809 } else {
18810 __ movdl($dst$$XMMRegister, $src$$Register);
18811 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18812 }
18813 } else {
18814 assert(UseAVX < 2, "");
18815 __ movdl($dst$$XMMRegister, $src$$Register);
18816 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18817 if (vlen >= 8) {
18818 assert(vlen == 8, "");
18819 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18820 }
18821 }
18822 %}
18823 ins_pipe( pipe_slow );
18824 %}
18825
18826 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18827 match(Set dst (Replicate con));
18828 effect(TEMP rtmp);
18829 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18830 ins_encode %{
18831 int vlen_enc = vector_length_encoding(this);
18832 BasicType bt = Matcher::vector_element_basic_type(this);
18833 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18834 __ movl($rtmp$$Register, $con$$constant);
18835 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18836 %}
18837 ins_pipe( pipe_slow );
18838 %}
18839
18840 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18841 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18842 match(Set dst (Replicate src));
18843 effect(TEMP rtmp);
18844 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18845 ins_encode %{
18846 int vlen_enc = vector_length_encoding(this);
18847 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18848 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18849 %}
18850 ins_pipe( pipe_slow );
18851 %}
18852
18853 instruct ReplS_mem(vec dst, memory mem) %{
18854 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18855 match(Set dst (Replicate (LoadS mem)));
18856 format %{ "replicateS $dst,$mem" %}
18857 ins_encode %{
18858 int vlen_enc = vector_length_encoding(this);
18859 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18860 %}
18861 ins_pipe( pipe_slow );
18862 %}
18863
18864 // ====================ReplicateI=======================================
18865
18866 instruct ReplI_reg(vec dst, rRegI src) %{
18867 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18868 match(Set dst (Replicate src));
18869 format %{ "replicateI $dst,$src" %}
18870 ins_encode %{
18871 uint vlen = Matcher::vector_length(this);
18872 int vlen_enc = vector_length_encoding(this);
18873 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18874 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18875 } else if (VM_Version::supports_avx2()) {
18876 __ movdl($dst$$XMMRegister, $src$$Register);
18877 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18878 } else {
18879 __ movdl($dst$$XMMRegister, $src$$Register);
18880 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18881 }
18882 %}
18883 ins_pipe( pipe_slow );
18884 %}
18885
18886 instruct ReplI_mem(vec dst, memory mem) %{
18887 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18888 match(Set dst (Replicate (LoadI mem)));
18889 format %{ "replicateI $dst,$mem" %}
18890 ins_encode %{
18891 int vlen_enc = vector_length_encoding(this);
18892 if (VM_Version::supports_avx2()) {
18893 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18894 } else if (VM_Version::supports_avx()) {
18895 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18896 } else {
18897 __ movdl($dst$$XMMRegister, $mem$$Address);
18898 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18899 }
18900 %}
18901 ins_pipe( pipe_slow );
18902 %}
18903
18904 instruct ReplI_imm(vec dst, immI con) %{
18905 predicate(Matcher::is_non_long_integral_vector(n));
18906 match(Set dst (Replicate con));
18907 format %{ "replicateI $dst,$con" %}
18908 ins_encode %{
18909 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18910 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18911 type2aelembytes(Matcher::vector_element_basic_type(this))));
18912 BasicType bt = Matcher::vector_element_basic_type(this);
18913 int vlen = Matcher::vector_length_in_bytes(this);
18914 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18915 %}
18916 ins_pipe( pipe_slow );
18917 %}
18918
18919 // Replicate scalar zero to be vector
18920 instruct ReplI_zero(vec dst, immI_0 zero) %{
18921 predicate(Matcher::is_non_long_integral_vector(n));
18922 match(Set dst (Replicate zero));
18923 format %{ "replicateI $dst,$zero" %}
18924 ins_encode %{
18925 int vlen_enc = vector_length_encoding(this);
18926 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18927 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18928 } else {
18929 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18930 }
18931 %}
18932 ins_pipe( fpu_reg_reg );
18933 %}
18934
18935 instruct ReplI_M1(vec dst, immI_M1 con) %{
18936 predicate(Matcher::is_non_long_integral_vector(n));
18937 match(Set dst (Replicate con));
18938 format %{ "vallones $dst" %}
18939 ins_encode %{
18940 int vector_len = vector_length_encoding(this);
18941 __ vallones($dst$$XMMRegister, vector_len);
18942 %}
18943 ins_pipe( pipe_slow );
18944 %}
18945
18946 // ====================ReplicateL=======================================
18947
18948 // Replicate long (8 byte) scalar to be vector
18949 instruct ReplL_reg(vec dst, rRegL src) %{
18950 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18951 match(Set dst (Replicate src));
18952 format %{ "replicateL $dst,$src" %}
18953 ins_encode %{
18954 int vlen = Matcher::vector_length(this);
18955 int vlen_enc = vector_length_encoding(this);
18956 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18957 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18958 } else if (VM_Version::supports_avx2()) {
18959 __ movdq($dst$$XMMRegister, $src$$Register);
18960 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18961 } else {
18962 __ movdq($dst$$XMMRegister, $src$$Register);
18963 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18964 }
18965 %}
18966 ins_pipe( pipe_slow );
18967 %}
18968
18969 instruct ReplL_mem(vec dst, memory mem) %{
18970 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18971 match(Set dst (Replicate (LoadL mem)));
18972 format %{ "replicateL $dst,$mem" %}
18973 ins_encode %{
18974 int vlen_enc = vector_length_encoding(this);
18975 if (VM_Version::supports_avx2()) {
18976 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18977 } else if (VM_Version::supports_sse3()) {
18978 __ movddup($dst$$XMMRegister, $mem$$Address);
18979 } else {
18980 __ movq($dst$$XMMRegister, $mem$$Address);
18981 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18982 }
18983 %}
18984 ins_pipe( pipe_slow );
18985 %}
18986
18987 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18988 instruct ReplL_imm(vec dst, immL con) %{
18989 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18990 match(Set dst (Replicate con));
18991 format %{ "replicateL $dst,$con" %}
18992 ins_encode %{
18993 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18994 int vlen = Matcher::vector_length_in_bytes(this);
18995 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18996 %}
18997 ins_pipe( pipe_slow );
18998 %}
18999
19000 instruct ReplL_zero(vec dst, immL0 zero) %{
19001 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19002 match(Set dst (Replicate zero));
19003 format %{ "replicateL $dst,$zero" %}
19004 ins_encode %{
19005 int vlen_enc = vector_length_encoding(this);
19006 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19007 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19008 } else {
19009 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19010 }
19011 %}
19012 ins_pipe( fpu_reg_reg );
19013 %}
19014
19015 instruct ReplL_M1(vec dst, immL_M1 con) %{
19016 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19017 match(Set dst (Replicate con));
19018 format %{ "vallones $dst" %}
19019 ins_encode %{
19020 int vector_len = vector_length_encoding(this);
19021 __ vallones($dst$$XMMRegister, vector_len);
19022 %}
19023 ins_pipe( pipe_slow );
19024 %}
19025
19026 // ====================ReplicateF=======================================
19027
19028 instruct vReplF_reg(vec dst, vlRegF src) %{
19029 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19030 match(Set dst (Replicate src));
19031 format %{ "replicateF $dst,$src" %}
19032 ins_encode %{
19033 uint vlen = Matcher::vector_length(this);
19034 int vlen_enc = vector_length_encoding(this);
19035 if (vlen <= 4) {
19036 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19037 } else if (VM_Version::supports_avx2()) {
19038 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19039 } else {
19040 assert(vlen == 8, "sanity");
19041 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19042 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19043 }
19044 %}
19045 ins_pipe( pipe_slow );
19046 %}
19047
19048 instruct ReplF_reg(vec dst, vlRegF src) %{
19049 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19050 match(Set dst (Replicate src));
19051 format %{ "replicateF $dst,$src" %}
19052 ins_encode %{
19053 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19054 %}
19055 ins_pipe( pipe_slow );
19056 %}
19057
19058 instruct ReplF_mem(vec dst, memory mem) %{
19059 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19060 match(Set dst (Replicate (LoadF mem)));
19061 format %{ "replicateF $dst,$mem" %}
19062 ins_encode %{
19063 int vlen_enc = vector_length_encoding(this);
19064 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19065 %}
19066 ins_pipe( pipe_slow );
19067 %}
19068
19069 // Replicate float scalar immediate to be vector by loading from const table.
19070 instruct ReplF_imm(vec dst, immF con) %{
19071 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19072 match(Set dst (Replicate con));
19073 format %{ "replicateF $dst,$con" %}
19074 ins_encode %{
19075 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19076 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19077 int vlen = Matcher::vector_length_in_bytes(this);
19078 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19079 %}
19080 ins_pipe( pipe_slow );
19081 %}
19082
19083 instruct ReplF_zero(vec dst, immF0 zero) %{
19084 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19085 match(Set dst (Replicate zero));
19086 format %{ "replicateF $dst,$zero" %}
19087 ins_encode %{
19088 int vlen_enc = vector_length_encoding(this);
19089 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19090 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19091 } else {
19092 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19093 }
19094 %}
19095 ins_pipe( fpu_reg_reg );
19096 %}
19097
19098 // ====================ReplicateD=======================================
19099
19100 // Replicate double (8 bytes) scalar to be vector
19101 instruct vReplD_reg(vec dst, vlRegD src) %{
19102 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19103 match(Set dst (Replicate src));
19104 format %{ "replicateD $dst,$src" %}
19105 ins_encode %{
19106 uint vlen = Matcher::vector_length(this);
19107 int vlen_enc = vector_length_encoding(this);
19108 if (vlen <= 2) {
19109 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19110 } else if (VM_Version::supports_avx2()) {
19111 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19112 } else {
19113 assert(vlen == 4, "sanity");
19114 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19115 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19116 }
19117 %}
19118 ins_pipe( pipe_slow );
19119 %}
19120
19121 instruct ReplD_reg(vec dst, vlRegD src) %{
19122 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19123 match(Set dst (Replicate src));
19124 format %{ "replicateD $dst,$src" %}
19125 ins_encode %{
19126 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19127 %}
19128 ins_pipe( pipe_slow );
19129 %}
19130
19131 instruct ReplD_mem(vec dst, memory mem) %{
19132 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19133 match(Set dst (Replicate (LoadD mem)));
19134 format %{ "replicateD $dst,$mem" %}
19135 ins_encode %{
19136 if (Matcher::vector_length(this) >= 4) {
19137 int vlen_enc = vector_length_encoding(this);
19138 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19139 } else {
19140 __ movddup($dst$$XMMRegister, $mem$$Address);
19141 }
19142 %}
19143 ins_pipe( pipe_slow );
19144 %}
19145
19146 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19147 instruct ReplD_imm(vec dst, immD con) %{
19148 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19149 match(Set dst (Replicate con));
19150 format %{ "replicateD $dst,$con" %}
19151 ins_encode %{
19152 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19153 int vlen = Matcher::vector_length_in_bytes(this);
19154 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19155 %}
19156 ins_pipe( pipe_slow );
19157 %}
19158
19159 instruct ReplD_zero(vec dst, immD0 zero) %{
19160 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19161 match(Set dst (Replicate zero));
19162 format %{ "replicateD $dst,$zero" %}
19163 ins_encode %{
19164 int vlen_enc = vector_length_encoding(this);
19165 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19166 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19167 } else {
19168 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19169 }
19170 %}
19171 ins_pipe( fpu_reg_reg );
19172 %}
19173
19174 // ====================VECTOR INSERT=======================================
19175
19176 instruct insert(vec dst, rRegI val, immU8 idx) %{
19177 predicate(Matcher::vector_length_in_bytes(n) < 32);
19178 match(Set dst (VectorInsert (Binary dst val) idx));
19179 format %{ "vector_insert $dst,$val,$idx" %}
19180 ins_encode %{
19181 assert(UseSSE >= 4, "required");
19182 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19183
19184 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19185
19186 assert(is_integral_type(elem_bt), "");
19187 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19188
19189 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19190 %}
19191 ins_pipe( pipe_slow );
19192 %}
19193
19194 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19195 predicate(Matcher::vector_length_in_bytes(n) == 32);
19196 match(Set dst (VectorInsert (Binary src val) idx));
19197 effect(TEMP vtmp);
19198 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19199 ins_encode %{
19200 int vlen_enc = Assembler::AVX_256bit;
19201 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19202 int elem_per_lane = 16/type2aelembytes(elem_bt);
19203 int log2epr = log2(elem_per_lane);
19204
19205 assert(is_integral_type(elem_bt), "sanity");
19206 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19207
19208 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19209 uint y_idx = ($idx$$constant >> log2epr) & 1;
19210 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19211 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19212 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19213 %}
19214 ins_pipe( pipe_slow );
19215 %}
19216
19217 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19218 predicate(Matcher::vector_length_in_bytes(n) == 64);
19219 match(Set dst (VectorInsert (Binary src val) idx));
19220 effect(TEMP vtmp);
19221 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19222 ins_encode %{
19223 assert(UseAVX > 2, "sanity");
19224
19225 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19226 int elem_per_lane = 16/type2aelembytes(elem_bt);
19227 int log2epr = log2(elem_per_lane);
19228
19229 assert(is_integral_type(elem_bt), "");
19230 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19231
19232 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19233 uint y_idx = ($idx$$constant >> log2epr) & 3;
19234 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19235 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19236 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19237 %}
19238 ins_pipe( pipe_slow );
19239 %}
19240
19241 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19242 predicate(Matcher::vector_length(n) == 2);
19243 match(Set dst (VectorInsert (Binary dst val) idx));
19244 format %{ "vector_insert $dst,$val,$idx" %}
19245 ins_encode %{
19246 assert(UseSSE >= 4, "required");
19247 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19248 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19249
19250 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19251 %}
19252 ins_pipe( pipe_slow );
19253 %}
19254
19255 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19256 predicate(Matcher::vector_length(n) == 4);
19257 match(Set dst (VectorInsert (Binary src val) idx));
19258 effect(TEMP vtmp);
19259 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19260 ins_encode %{
19261 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19262 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19263
19264 uint x_idx = $idx$$constant & right_n_bits(1);
19265 uint y_idx = ($idx$$constant >> 1) & 1;
19266 int vlen_enc = Assembler::AVX_256bit;
19267 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19268 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19269 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19270 %}
19271 ins_pipe( pipe_slow );
19272 %}
19273
19274 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19275 predicate(Matcher::vector_length(n) == 8);
19276 match(Set dst (VectorInsert (Binary src val) idx));
19277 effect(TEMP vtmp);
19278 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19279 ins_encode %{
19280 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19281 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19282
19283 uint x_idx = $idx$$constant & right_n_bits(1);
19284 uint y_idx = ($idx$$constant >> 1) & 3;
19285 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19286 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19287 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19288 %}
19289 ins_pipe( pipe_slow );
19290 %}
19291
19292 instruct insertF(vec dst, regF val, immU8 idx) %{
19293 predicate(Matcher::vector_length(n) < 8);
19294 match(Set dst (VectorInsert (Binary dst val) idx));
19295 format %{ "vector_insert $dst,$val,$idx" %}
19296 ins_encode %{
19297 assert(UseSSE >= 4, "sanity");
19298
19299 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19300 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19301
19302 uint x_idx = $idx$$constant & right_n_bits(2);
19303 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19304 %}
19305 ins_pipe( pipe_slow );
19306 %}
19307
19308 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19309 predicate(Matcher::vector_length(n) >= 8);
19310 match(Set dst (VectorInsert (Binary src val) idx));
19311 effect(TEMP vtmp);
19312 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19313 ins_encode %{
19314 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19315 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19316
19317 int vlen = Matcher::vector_length(this);
19318 uint x_idx = $idx$$constant & right_n_bits(2);
19319 if (vlen == 8) {
19320 uint y_idx = ($idx$$constant >> 2) & 1;
19321 int vlen_enc = Assembler::AVX_256bit;
19322 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19323 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19324 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19325 } else {
19326 assert(vlen == 16, "sanity");
19327 uint y_idx = ($idx$$constant >> 2) & 3;
19328 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19329 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19330 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19331 }
19332 %}
19333 ins_pipe( pipe_slow );
19334 %}
19335
19336 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19337 predicate(Matcher::vector_length(n) == 2);
19338 match(Set dst (VectorInsert (Binary dst val) idx));
19339 effect(TEMP tmp);
19340 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19341 ins_encode %{
19342 assert(UseSSE >= 4, "sanity");
19343 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19344 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19345
19346 __ movq($tmp$$Register, $val$$XMMRegister);
19347 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19348 %}
19349 ins_pipe( pipe_slow );
19350 %}
19351
19352 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19353 predicate(Matcher::vector_length(n) == 4);
19354 match(Set dst (VectorInsert (Binary src val) idx));
19355 effect(TEMP vtmp, TEMP tmp);
19356 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19357 ins_encode %{
19358 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19359 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19360
19361 uint x_idx = $idx$$constant & right_n_bits(1);
19362 uint y_idx = ($idx$$constant >> 1) & 1;
19363 int vlen_enc = Assembler::AVX_256bit;
19364 __ movq($tmp$$Register, $val$$XMMRegister);
19365 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19366 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19367 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19368 %}
19369 ins_pipe( pipe_slow );
19370 %}
19371
19372 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19373 predicate(Matcher::vector_length(n) == 8);
19374 match(Set dst (VectorInsert (Binary src val) idx));
19375 effect(TEMP tmp, TEMP vtmp);
19376 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19377 ins_encode %{
19378 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19379 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19380
19381 uint x_idx = $idx$$constant & right_n_bits(1);
19382 uint y_idx = ($idx$$constant >> 1) & 3;
19383 __ movq($tmp$$Register, $val$$XMMRegister);
19384 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19385 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19386 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19387 %}
19388 ins_pipe( pipe_slow );
19389 %}
19390
19391 // ====================REDUCTION ARITHMETIC=======================================
19392
19393 // =======================Int Reduction==========================================
19394
19395 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19396 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19397 match(Set dst (AddReductionVI src1 src2));
19398 match(Set dst (MulReductionVI src1 src2));
19399 match(Set dst (AndReductionV src1 src2));
19400 match(Set dst ( OrReductionV src1 src2));
19401 match(Set dst (XorReductionV src1 src2));
19402 match(Set dst (MinReductionV src1 src2));
19403 match(Set dst (MaxReductionV src1 src2));
19404 match(Set dst (UMinReductionV src1 src2));
19405 match(Set dst (UMaxReductionV src1 src2));
19406 effect(TEMP vtmp1, TEMP vtmp2);
19407 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19408 ins_encode %{
19409 int opcode = this->ideal_Opcode();
19410 int vlen = Matcher::vector_length(this, $src2);
19411 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19412 %}
19413 ins_pipe( pipe_slow );
19414 %}
19415
19416 // =======================Long Reduction==========================================
19417
19418 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19419 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19420 match(Set dst (AddReductionVL src1 src2));
19421 match(Set dst (MulReductionVL src1 src2));
19422 match(Set dst (AndReductionV src1 src2));
19423 match(Set dst ( OrReductionV src1 src2));
19424 match(Set dst (XorReductionV src1 src2));
19425 match(Set dst (MinReductionV src1 src2));
19426 match(Set dst (MaxReductionV src1 src2));
19427 match(Set dst (UMinReductionV src1 src2));
19428 match(Set dst (UMaxReductionV src1 src2));
19429 effect(TEMP vtmp1, TEMP vtmp2);
19430 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19431 ins_encode %{
19432 int opcode = this->ideal_Opcode();
19433 int vlen = Matcher::vector_length(this, $src2);
19434 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19435 %}
19436 ins_pipe( pipe_slow );
19437 %}
19438
19439 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19440 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19441 match(Set dst (AddReductionVL src1 src2));
19442 match(Set dst (MulReductionVL src1 src2));
19443 match(Set dst (AndReductionV src1 src2));
19444 match(Set dst ( OrReductionV src1 src2));
19445 match(Set dst (XorReductionV src1 src2));
19446 match(Set dst (MinReductionV src1 src2));
19447 match(Set dst (MaxReductionV src1 src2));
19448 match(Set dst (UMinReductionV src1 src2));
19449 match(Set dst (UMaxReductionV src1 src2));
19450 effect(TEMP vtmp1, TEMP vtmp2);
19451 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19452 ins_encode %{
19453 int opcode = this->ideal_Opcode();
19454 int vlen = Matcher::vector_length(this, $src2);
19455 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19456 %}
19457 ins_pipe( pipe_slow );
19458 %}
19459
19460 // =======================Float Reduction==========================================
19461
19462 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19463 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19464 match(Set dst (AddReductionVF dst src));
19465 match(Set dst (MulReductionVF dst src));
19466 effect(TEMP dst, TEMP vtmp);
19467 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19468 ins_encode %{
19469 int opcode = this->ideal_Opcode();
19470 int vlen = Matcher::vector_length(this, $src);
19471 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19472 %}
19473 ins_pipe( pipe_slow );
19474 %}
19475
19476 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19477 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19478 match(Set dst (AddReductionVF dst src));
19479 match(Set dst (MulReductionVF dst src));
19480 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19481 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19482 ins_encode %{
19483 int opcode = this->ideal_Opcode();
19484 int vlen = Matcher::vector_length(this, $src);
19485 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19486 %}
19487 ins_pipe( pipe_slow );
19488 %}
19489
19490 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19491 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19492 match(Set dst (AddReductionVF dst src));
19493 match(Set dst (MulReductionVF dst src));
19494 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19495 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19496 ins_encode %{
19497 int opcode = this->ideal_Opcode();
19498 int vlen = Matcher::vector_length(this, $src);
19499 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19500 %}
19501 ins_pipe( pipe_slow );
19502 %}
19503
19504
19505 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19506 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19507 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19508 // src1 contains reduction identity
19509 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19510 match(Set dst (AddReductionVF src1 src2));
19511 match(Set dst (MulReductionVF src1 src2));
19512 effect(TEMP dst);
19513 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19514 ins_encode %{
19515 int opcode = this->ideal_Opcode();
19516 int vlen = Matcher::vector_length(this, $src2);
19517 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19518 %}
19519 ins_pipe( pipe_slow );
19520 %}
19521
19522 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19523 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19524 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19525 // src1 contains reduction identity
19526 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19527 match(Set dst (AddReductionVF src1 src2));
19528 match(Set dst (MulReductionVF src1 src2));
19529 effect(TEMP dst, TEMP vtmp);
19530 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19531 ins_encode %{
19532 int opcode = this->ideal_Opcode();
19533 int vlen = Matcher::vector_length(this, $src2);
19534 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19535 %}
19536 ins_pipe( pipe_slow );
19537 %}
19538
19539 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19540 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19541 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19542 // src1 contains reduction identity
19543 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19544 match(Set dst (AddReductionVF src1 src2));
19545 match(Set dst (MulReductionVF src1 src2));
19546 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19547 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19548 ins_encode %{
19549 int opcode = this->ideal_Opcode();
19550 int vlen = Matcher::vector_length(this, $src2);
19551 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19552 %}
19553 ins_pipe( pipe_slow );
19554 %}
19555
19556 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19557 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19558 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19559 // src1 contains reduction identity
19560 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19561 match(Set dst (AddReductionVF src1 src2));
19562 match(Set dst (MulReductionVF src1 src2));
19563 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19564 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19565 ins_encode %{
19566 int opcode = this->ideal_Opcode();
19567 int vlen = Matcher::vector_length(this, $src2);
19568 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19569 %}
19570 ins_pipe( pipe_slow );
19571 %}
19572
19573 // =======================Double Reduction==========================================
19574
19575 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19576 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19577 match(Set dst (AddReductionVD dst src));
19578 match(Set dst (MulReductionVD dst src));
19579 effect(TEMP dst, TEMP vtmp);
19580 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19581 ins_encode %{
19582 int opcode = this->ideal_Opcode();
19583 int vlen = Matcher::vector_length(this, $src);
19584 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19585 %}
19586 ins_pipe( pipe_slow );
19587 %}
19588
19589 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19590 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19591 match(Set dst (AddReductionVD dst src));
19592 match(Set dst (MulReductionVD dst src));
19593 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19594 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19595 ins_encode %{
19596 int opcode = this->ideal_Opcode();
19597 int vlen = Matcher::vector_length(this, $src);
19598 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19599 %}
19600 ins_pipe( pipe_slow );
19601 %}
19602
19603 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19604 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19605 match(Set dst (AddReductionVD dst src));
19606 match(Set dst (MulReductionVD dst src));
19607 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19608 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19609 ins_encode %{
19610 int opcode = this->ideal_Opcode();
19611 int vlen = Matcher::vector_length(this, $src);
19612 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19613 %}
19614 ins_pipe( pipe_slow );
19615 %}
19616
19617 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19618 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19619 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19620 // src1 contains reduction identity
19621 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19622 match(Set dst (AddReductionVD src1 src2));
19623 match(Set dst (MulReductionVD src1 src2));
19624 effect(TEMP dst);
19625 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19626 ins_encode %{
19627 int opcode = this->ideal_Opcode();
19628 int vlen = Matcher::vector_length(this, $src2);
19629 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19630 %}
19631 ins_pipe( pipe_slow );
19632 %}
19633
19634 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19635 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19636 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19637 // src1 contains reduction identity
19638 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19639 match(Set dst (AddReductionVD src1 src2));
19640 match(Set dst (MulReductionVD src1 src2));
19641 effect(TEMP dst, TEMP vtmp);
19642 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19643 ins_encode %{
19644 int opcode = this->ideal_Opcode();
19645 int vlen = Matcher::vector_length(this, $src2);
19646 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19647 %}
19648 ins_pipe( pipe_slow );
19649 %}
19650
19651 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19652 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19653 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19654 // src1 contains reduction identity
19655 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19656 match(Set dst (AddReductionVD src1 src2));
19657 match(Set dst (MulReductionVD src1 src2));
19658 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19659 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19660 ins_encode %{
19661 int opcode = this->ideal_Opcode();
19662 int vlen = Matcher::vector_length(this, $src2);
19663 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19664 %}
19665 ins_pipe( pipe_slow );
19666 %}
19667
19668 // =======================Byte Reduction==========================================
19669
19670 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19671 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19672 match(Set dst (AddReductionVI src1 src2));
19673 match(Set dst (AndReductionV src1 src2));
19674 match(Set dst ( OrReductionV src1 src2));
19675 match(Set dst (XorReductionV src1 src2));
19676 match(Set dst (MinReductionV src1 src2));
19677 match(Set dst (MaxReductionV src1 src2));
19678 match(Set dst (UMinReductionV src1 src2));
19679 match(Set dst (UMaxReductionV src1 src2));
19680 effect(TEMP vtmp1, TEMP vtmp2);
19681 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19682 ins_encode %{
19683 int opcode = this->ideal_Opcode();
19684 int vlen = Matcher::vector_length(this, $src2);
19685 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19686 %}
19687 ins_pipe( pipe_slow );
19688 %}
19689
19690 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19691 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19692 match(Set dst (AddReductionVI src1 src2));
19693 match(Set dst (AndReductionV src1 src2));
19694 match(Set dst ( OrReductionV src1 src2));
19695 match(Set dst (XorReductionV src1 src2));
19696 match(Set dst (MinReductionV src1 src2));
19697 match(Set dst (MaxReductionV src1 src2));
19698 match(Set dst (UMinReductionV src1 src2));
19699 match(Set dst (UMaxReductionV src1 src2));
19700 effect(TEMP vtmp1, TEMP vtmp2);
19701 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19702 ins_encode %{
19703 int opcode = this->ideal_Opcode();
19704 int vlen = Matcher::vector_length(this, $src2);
19705 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19706 %}
19707 ins_pipe( pipe_slow );
19708 %}
19709
19710 // =======================Short Reduction==========================================
19711
19712 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19713 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19714 match(Set dst (AddReductionVI src1 src2));
19715 match(Set dst (MulReductionVI src1 src2));
19716 match(Set dst (AndReductionV src1 src2));
19717 match(Set dst ( OrReductionV src1 src2));
19718 match(Set dst (XorReductionV src1 src2));
19719 match(Set dst (MinReductionV src1 src2));
19720 match(Set dst (MaxReductionV src1 src2));
19721 match(Set dst (UMinReductionV src1 src2));
19722 match(Set dst (UMaxReductionV src1 src2));
19723 effect(TEMP vtmp1, TEMP vtmp2);
19724 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19725 ins_encode %{
19726 int opcode = this->ideal_Opcode();
19727 int vlen = Matcher::vector_length(this, $src2);
19728 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19729 %}
19730 ins_pipe( pipe_slow );
19731 %}
19732
19733 // =======================Mul Reduction==========================================
19734
19735 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19736 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19737 Matcher::vector_length(n->in(2)) <= 32); // src2
19738 match(Set dst (MulReductionVI src1 src2));
19739 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19740 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19741 ins_encode %{
19742 int opcode = this->ideal_Opcode();
19743 int vlen = Matcher::vector_length(this, $src2);
19744 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19745 %}
19746 ins_pipe( pipe_slow );
19747 %}
19748
19749 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19750 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19751 Matcher::vector_length(n->in(2)) == 64); // src2
19752 match(Set dst (MulReductionVI src1 src2));
19753 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19754 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19755 ins_encode %{
19756 int opcode = this->ideal_Opcode();
19757 int vlen = Matcher::vector_length(this, $src2);
19758 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19759 %}
19760 ins_pipe( pipe_slow );
19761 %}
19762
19763 //--------------------Min/Max Float Reduction --------------------
19764 // Float Min Reduction
19765 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19766 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19767 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19768 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19769 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19770 Matcher::vector_length(n->in(2)) == 2);
19771 match(Set dst (MinReductionV src1 src2));
19772 match(Set dst (MaxReductionV src1 src2));
19773 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19774 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19775 ins_encode %{
19776 assert(UseAVX > 0, "sanity");
19777
19778 int opcode = this->ideal_Opcode();
19779 int vlen = Matcher::vector_length(this, $src2);
19780 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19781 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19782 %}
19783 ins_pipe( pipe_slow );
19784 %}
19785
19786 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19787 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19788 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19789 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19790 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19791 Matcher::vector_length(n->in(2)) >= 4);
19792 match(Set dst (MinReductionV src1 src2));
19793 match(Set dst (MaxReductionV src1 src2));
19794 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19795 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19796 ins_encode %{
19797 assert(UseAVX > 0, "sanity");
19798
19799 int opcode = this->ideal_Opcode();
19800 int vlen = Matcher::vector_length(this, $src2);
19801 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19802 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19803 %}
19804 ins_pipe( pipe_slow );
19805 %}
19806
19807 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19808 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19809 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19810 Matcher::vector_length(n->in(2)) == 2);
19811 match(Set dst (MinReductionV dst src));
19812 match(Set dst (MaxReductionV dst src));
19813 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19814 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19815 ins_encode %{
19816 assert(UseAVX > 0, "sanity");
19817
19818 int opcode = this->ideal_Opcode();
19819 int vlen = Matcher::vector_length(this, $src);
19820 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19821 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19822 %}
19823 ins_pipe( pipe_slow );
19824 %}
19825
19826
19827 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19828 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19829 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19830 Matcher::vector_length(n->in(2)) >= 4);
19831 match(Set dst (MinReductionV dst src));
19832 match(Set dst (MaxReductionV dst src));
19833 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19834 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19835 ins_encode %{
19836 assert(UseAVX > 0, "sanity");
19837
19838 int opcode = this->ideal_Opcode();
19839 int vlen = Matcher::vector_length(this, $src);
19840 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19841 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19842 %}
19843 ins_pipe( pipe_slow );
19844 %}
19845
19846 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19847 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19848 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19849 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19850 Matcher::vector_length(n->in(2)) == 2);
19851 match(Set dst (MinReductionV src1 src2));
19852 match(Set dst (MaxReductionV src1 src2));
19853 effect(TEMP dst, TEMP xtmp1);
19854 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19855 ins_encode %{
19856 int opcode = this->ideal_Opcode();
19857 int vlen = Matcher::vector_length(this, $src2);
19858 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19859 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19860 %}
19861 ins_pipe( pipe_slow );
19862 %}
19863
19864 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19865 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19866 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19867 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19868 Matcher::vector_length(n->in(2)) >= 4);
19869 match(Set dst (MinReductionV src1 src2));
19870 match(Set dst (MaxReductionV src1 src2));
19871 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19872 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19873 ins_encode %{
19874 int opcode = this->ideal_Opcode();
19875 int vlen = Matcher::vector_length(this, $src2);
19876 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19877 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19878 %}
19879 ins_pipe( pipe_slow );
19880 %}
19881
19882 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19883 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19884 Matcher::vector_length(n->in(2)) == 2);
19885 match(Set dst (MinReductionV dst src));
19886 match(Set dst (MaxReductionV dst src));
19887 effect(TEMP dst, TEMP xtmp1);
19888 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19889 ins_encode %{
19890 int opcode = this->ideal_Opcode();
19891 int vlen = Matcher::vector_length(this, $src);
19892 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19893 $xtmp1$$XMMRegister);
19894 %}
19895 ins_pipe( pipe_slow );
19896 %}
19897
19898 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19899 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19900 Matcher::vector_length(n->in(2)) >= 4);
19901 match(Set dst (MinReductionV dst src));
19902 match(Set dst (MaxReductionV dst src));
19903 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19904 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19905 ins_encode %{
19906 int opcode = this->ideal_Opcode();
19907 int vlen = Matcher::vector_length(this, $src);
19908 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19909 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19910 %}
19911 ins_pipe( pipe_slow );
19912 %}
19913
19914 //--------------------Min Double Reduction --------------------
19915 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19916 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19917 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19918 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19919 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19920 Matcher::vector_length(n->in(2)) == 2);
19921 match(Set dst (MinReductionV src1 src2));
19922 match(Set dst (MaxReductionV src1 src2));
19923 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19924 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19925 ins_encode %{
19926 assert(UseAVX > 0, "sanity");
19927
19928 int opcode = this->ideal_Opcode();
19929 int vlen = Matcher::vector_length(this, $src2);
19930 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19931 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19932 %}
19933 ins_pipe( pipe_slow );
19934 %}
19935
19936 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19937 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19938 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19939 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19940 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19941 Matcher::vector_length(n->in(2)) >= 4);
19942 match(Set dst (MinReductionV src1 src2));
19943 match(Set dst (MaxReductionV src1 src2));
19944 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19945 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19946 ins_encode %{
19947 assert(UseAVX > 0, "sanity");
19948
19949 int opcode = this->ideal_Opcode();
19950 int vlen = Matcher::vector_length(this, $src2);
19951 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19952 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19953 %}
19954 ins_pipe( pipe_slow );
19955 %}
19956
19957
19958 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19959 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19960 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19961 Matcher::vector_length(n->in(2)) == 2);
19962 match(Set dst (MinReductionV dst src));
19963 match(Set dst (MaxReductionV dst src));
19964 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19965 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19966 ins_encode %{
19967 assert(UseAVX > 0, "sanity");
19968
19969 int opcode = this->ideal_Opcode();
19970 int vlen = Matcher::vector_length(this, $src);
19971 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19972 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19973 %}
19974 ins_pipe( pipe_slow );
19975 %}
19976
19977 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19978 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19979 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19980 Matcher::vector_length(n->in(2)) >= 4);
19981 match(Set dst (MinReductionV dst src));
19982 match(Set dst (MaxReductionV dst src));
19983 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19984 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19985 ins_encode %{
19986 assert(UseAVX > 0, "sanity");
19987
19988 int opcode = this->ideal_Opcode();
19989 int vlen = Matcher::vector_length(this, $src);
19990 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19991 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19992 %}
19993 ins_pipe( pipe_slow );
19994 %}
19995
19996 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19997 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19998 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19999 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20000 Matcher::vector_length(n->in(2)) == 2);
20001 match(Set dst (MinReductionV src1 src2));
20002 match(Set dst (MaxReductionV src1 src2));
20003 effect(TEMP dst, TEMP xtmp1);
20004 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20005 ins_encode %{
20006 int opcode = this->ideal_Opcode();
20007 int vlen = Matcher::vector_length(this, $src2);
20008 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20009 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20010 %}
20011 ins_pipe( pipe_slow );
20012 %}
20013
20014 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20015 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20016 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20017 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20018 Matcher::vector_length(n->in(2)) >= 4);
20019 match(Set dst (MinReductionV src1 src2));
20020 match(Set dst (MaxReductionV src1 src2));
20021 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20022 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20023 ins_encode %{
20024 int opcode = this->ideal_Opcode();
20025 int vlen = Matcher::vector_length(this, $src2);
20026 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20027 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20028 %}
20029 ins_pipe( pipe_slow );
20030 %}
20031
20032
20033 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20034 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20035 Matcher::vector_length(n->in(2)) == 2);
20036 match(Set dst (MinReductionV dst src));
20037 match(Set dst (MaxReductionV dst src));
20038 effect(TEMP dst, TEMP xtmp1);
20039 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20040 ins_encode %{
20041 int opcode = this->ideal_Opcode();
20042 int vlen = Matcher::vector_length(this, $src);
20043 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20044 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20045 %}
20046 ins_pipe( pipe_slow );
20047 %}
20048
20049 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20050 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20051 Matcher::vector_length(n->in(2)) >= 4);
20052 match(Set dst (MinReductionV dst src));
20053 match(Set dst (MaxReductionV dst src));
20054 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20055 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20056 ins_encode %{
20057 int opcode = this->ideal_Opcode();
20058 int vlen = Matcher::vector_length(this, $src);
20059 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20060 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20061 %}
20062 ins_pipe( pipe_slow );
20063 %}
20064
20065 // ====================VECTOR ARITHMETIC=======================================
20066
20067 // --------------------------------- ADD --------------------------------------
20068
20069 // Bytes vector add
20070 instruct vaddB(vec dst, vec src) %{
20071 predicate(UseAVX == 0);
20072 match(Set dst (AddVB dst src));
20073 format %{ "paddb $dst,$src\t! add packedB" %}
20074 ins_encode %{
20075 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20076 %}
20077 ins_pipe( pipe_slow );
20078 %}
20079
20080 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20081 predicate(UseAVX > 0);
20082 match(Set dst (AddVB src1 src2));
20083 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20084 ins_encode %{
20085 int vlen_enc = vector_length_encoding(this);
20086 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20087 %}
20088 ins_pipe( pipe_slow );
20089 %}
20090
20091 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20092 predicate((UseAVX > 0) &&
20093 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20094 match(Set dst (AddVB src (LoadVector mem)));
20095 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20096 ins_encode %{
20097 int vlen_enc = vector_length_encoding(this);
20098 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20099 %}
20100 ins_pipe( pipe_slow );
20101 %}
20102
20103 // Shorts/Chars vector add
20104 instruct vaddS(vec dst, vec src) %{
20105 predicate(UseAVX == 0);
20106 match(Set dst (AddVS dst src));
20107 format %{ "paddw $dst,$src\t! add packedS" %}
20108 ins_encode %{
20109 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20110 %}
20111 ins_pipe( pipe_slow );
20112 %}
20113
20114 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20115 predicate(UseAVX > 0);
20116 match(Set dst (AddVS src1 src2));
20117 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20118 ins_encode %{
20119 int vlen_enc = vector_length_encoding(this);
20120 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20121 %}
20122 ins_pipe( pipe_slow );
20123 %}
20124
20125 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20126 predicate((UseAVX > 0) &&
20127 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20128 match(Set dst (AddVS src (LoadVector mem)));
20129 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20130 ins_encode %{
20131 int vlen_enc = vector_length_encoding(this);
20132 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20133 %}
20134 ins_pipe( pipe_slow );
20135 %}
20136
20137 // Integers vector add
20138 instruct vaddI(vec dst, vec src) %{
20139 predicate(UseAVX == 0);
20140 match(Set dst (AddVI dst src));
20141 format %{ "paddd $dst,$src\t! add packedI" %}
20142 ins_encode %{
20143 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20144 %}
20145 ins_pipe( pipe_slow );
20146 %}
20147
20148 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20149 predicate(UseAVX > 0);
20150 match(Set dst (AddVI src1 src2));
20151 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20152 ins_encode %{
20153 int vlen_enc = vector_length_encoding(this);
20154 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20155 %}
20156 ins_pipe( pipe_slow );
20157 %}
20158
20159
20160 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20161 predicate((UseAVX > 0) &&
20162 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20163 match(Set dst (AddVI src (LoadVector mem)));
20164 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20165 ins_encode %{
20166 int vlen_enc = vector_length_encoding(this);
20167 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20168 %}
20169 ins_pipe( pipe_slow );
20170 %}
20171
20172 // Longs vector add
20173 instruct vaddL(vec dst, vec src) %{
20174 predicate(UseAVX == 0);
20175 match(Set dst (AddVL dst src));
20176 format %{ "paddq $dst,$src\t! add packedL" %}
20177 ins_encode %{
20178 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20179 %}
20180 ins_pipe( pipe_slow );
20181 %}
20182
20183 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20184 predicate(UseAVX > 0);
20185 match(Set dst (AddVL src1 src2));
20186 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20187 ins_encode %{
20188 int vlen_enc = vector_length_encoding(this);
20189 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20190 %}
20191 ins_pipe( pipe_slow );
20192 %}
20193
20194 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20195 predicate((UseAVX > 0) &&
20196 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20197 match(Set dst (AddVL src (LoadVector mem)));
20198 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20199 ins_encode %{
20200 int vlen_enc = vector_length_encoding(this);
20201 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20202 %}
20203 ins_pipe( pipe_slow );
20204 %}
20205
20206 // Floats vector add
20207 instruct vaddF(vec dst, vec src) %{
20208 predicate(UseAVX == 0);
20209 match(Set dst (AddVF dst src));
20210 format %{ "addps $dst,$src\t! add packedF" %}
20211 ins_encode %{
20212 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20213 %}
20214 ins_pipe( pipe_slow );
20215 %}
20216
20217 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20218 predicate(UseAVX > 0);
20219 match(Set dst (AddVF src1 src2));
20220 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20221 ins_encode %{
20222 int vlen_enc = vector_length_encoding(this);
20223 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20224 %}
20225 ins_pipe( pipe_slow );
20226 %}
20227
20228 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20229 predicate((UseAVX > 0) &&
20230 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20231 match(Set dst (AddVF src (LoadVector mem)));
20232 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20233 ins_encode %{
20234 int vlen_enc = vector_length_encoding(this);
20235 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20236 %}
20237 ins_pipe( pipe_slow );
20238 %}
20239
20240 // Doubles vector add
20241 instruct vaddD(vec dst, vec src) %{
20242 predicate(UseAVX == 0);
20243 match(Set dst (AddVD dst src));
20244 format %{ "addpd $dst,$src\t! add packedD" %}
20245 ins_encode %{
20246 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20247 %}
20248 ins_pipe( pipe_slow );
20249 %}
20250
20251 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20252 predicate(UseAVX > 0);
20253 match(Set dst (AddVD src1 src2));
20254 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20255 ins_encode %{
20256 int vlen_enc = vector_length_encoding(this);
20257 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20258 %}
20259 ins_pipe( pipe_slow );
20260 %}
20261
20262 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20263 predicate((UseAVX > 0) &&
20264 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20265 match(Set dst (AddVD src (LoadVector mem)));
20266 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20267 ins_encode %{
20268 int vlen_enc = vector_length_encoding(this);
20269 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20270 %}
20271 ins_pipe( pipe_slow );
20272 %}
20273
20274 // --------------------------------- SUB --------------------------------------
20275
20276 // Bytes vector sub
20277 instruct vsubB(vec dst, vec src) %{
20278 predicate(UseAVX == 0);
20279 match(Set dst (SubVB dst src));
20280 format %{ "psubb $dst,$src\t! sub packedB" %}
20281 ins_encode %{
20282 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20283 %}
20284 ins_pipe( pipe_slow );
20285 %}
20286
20287 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20288 predicate(UseAVX > 0);
20289 match(Set dst (SubVB src1 src2));
20290 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20291 ins_encode %{
20292 int vlen_enc = vector_length_encoding(this);
20293 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20294 %}
20295 ins_pipe( pipe_slow );
20296 %}
20297
20298 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20299 predicate((UseAVX > 0) &&
20300 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20301 match(Set dst (SubVB src (LoadVector mem)));
20302 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20303 ins_encode %{
20304 int vlen_enc = vector_length_encoding(this);
20305 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20306 %}
20307 ins_pipe( pipe_slow );
20308 %}
20309
20310 // Shorts/Chars vector sub
20311 instruct vsubS(vec dst, vec src) %{
20312 predicate(UseAVX == 0);
20313 match(Set dst (SubVS dst src));
20314 format %{ "psubw $dst,$src\t! sub packedS" %}
20315 ins_encode %{
20316 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20317 %}
20318 ins_pipe( pipe_slow );
20319 %}
20320
20321
20322 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20323 predicate(UseAVX > 0);
20324 match(Set dst (SubVS src1 src2));
20325 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20326 ins_encode %{
20327 int vlen_enc = vector_length_encoding(this);
20328 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20329 %}
20330 ins_pipe( pipe_slow );
20331 %}
20332
20333 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20334 predicate((UseAVX > 0) &&
20335 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20336 match(Set dst (SubVS src (LoadVector mem)));
20337 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20338 ins_encode %{
20339 int vlen_enc = vector_length_encoding(this);
20340 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20341 %}
20342 ins_pipe( pipe_slow );
20343 %}
20344
20345 // Integers vector sub
20346 instruct vsubI(vec dst, vec src) %{
20347 predicate(UseAVX == 0);
20348 match(Set dst (SubVI dst src));
20349 format %{ "psubd $dst,$src\t! sub packedI" %}
20350 ins_encode %{
20351 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20352 %}
20353 ins_pipe( pipe_slow );
20354 %}
20355
20356 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20357 predicate(UseAVX > 0);
20358 match(Set dst (SubVI src1 src2));
20359 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20360 ins_encode %{
20361 int vlen_enc = vector_length_encoding(this);
20362 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20363 %}
20364 ins_pipe( pipe_slow );
20365 %}
20366
20367 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20368 predicate((UseAVX > 0) &&
20369 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20370 match(Set dst (SubVI src (LoadVector mem)));
20371 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20372 ins_encode %{
20373 int vlen_enc = vector_length_encoding(this);
20374 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20375 %}
20376 ins_pipe( pipe_slow );
20377 %}
20378
20379 // Longs vector sub
20380 instruct vsubL(vec dst, vec src) %{
20381 predicate(UseAVX == 0);
20382 match(Set dst (SubVL dst src));
20383 format %{ "psubq $dst,$src\t! sub packedL" %}
20384 ins_encode %{
20385 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20386 %}
20387 ins_pipe( pipe_slow );
20388 %}
20389
20390 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20391 predicate(UseAVX > 0);
20392 match(Set dst (SubVL src1 src2));
20393 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20394 ins_encode %{
20395 int vlen_enc = vector_length_encoding(this);
20396 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20397 %}
20398 ins_pipe( pipe_slow );
20399 %}
20400
20401
20402 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20403 predicate((UseAVX > 0) &&
20404 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20405 match(Set dst (SubVL src (LoadVector mem)));
20406 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20407 ins_encode %{
20408 int vlen_enc = vector_length_encoding(this);
20409 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20410 %}
20411 ins_pipe( pipe_slow );
20412 %}
20413
20414 // Floats vector sub
20415 instruct vsubF(vec dst, vec src) %{
20416 predicate(UseAVX == 0);
20417 match(Set dst (SubVF dst src));
20418 format %{ "subps $dst,$src\t! sub packedF" %}
20419 ins_encode %{
20420 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20421 %}
20422 ins_pipe( pipe_slow );
20423 %}
20424
20425 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20426 predicate(UseAVX > 0);
20427 match(Set dst (SubVF src1 src2));
20428 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20429 ins_encode %{
20430 int vlen_enc = vector_length_encoding(this);
20431 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20432 %}
20433 ins_pipe( pipe_slow );
20434 %}
20435
20436 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20437 predicate((UseAVX > 0) &&
20438 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20439 match(Set dst (SubVF src (LoadVector mem)));
20440 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20441 ins_encode %{
20442 int vlen_enc = vector_length_encoding(this);
20443 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20444 %}
20445 ins_pipe( pipe_slow );
20446 %}
20447
20448 // Doubles vector sub
20449 instruct vsubD(vec dst, vec src) %{
20450 predicate(UseAVX == 0);
20451 match(Set dst (SubVD dst src));
20452 format %{ "subpd $dst,$src\t! sub packedD" %}
20453 ins_encode %{
20454 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20455 %}
20456 ins_pipe( pipe_slow );
20457 %}
20458
20459 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20460 predicate(UseAVX > 0);
20461 match(Set dst (SubVD src1 src2));
20462 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20463 ins_encode %{
20464 int vlen_enc = vector_length_encoding(this);
20465 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20466 %}
20467 ins_pipe( pipe_slow );
20468 %}
20469
20470 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20471 predicate((UseAVX > 0) &&
20472 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20473 match(Set dst (SubVD src (LoadVector mem)));
20474 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20475 ins_encode %{
20476 int vlen_enc = vector_length_encoding(this);
20477 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20478 %}
20479 ins_pipe( pipe_slow );
20480 %}
20481
20482 // --------------------------------- MUL --------------------------------------
20483
20484 // Byte vector mul
20485 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20486 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20487 match(Set dst (MulVB src1 src2));
20488 effect(TEMP dst, TEMP xtmp);
20489 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20490 ins_encode %{
20491 assert(UseSSE > 3, "required");
20492 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20493 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20494 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20495 __ psllw($dst$$XMMRegister, 8);
20496 __ psrlw($dst$$XMMRegister, 8);
20497 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20498 %}
20499 ins_pipe( pipe_slow );
20500 %}
20501
20502 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20503 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20504 match(Set dst (MulVB src1 src2));
20505 effect(TEMP dst, TEMP xtmp);
20506 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20507 ins_encode %{
20508 assert(UseSSE > 3, "required");
20509 // Odd-index elements
20510 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20511 __ psrlw($dst$$XMMRegister, 8);
20512 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20513 __ psrlw($xtmp$$XMMRegister, 8);
20514 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20515 __ psllw($dst$$XMMRegister, 8);
20516 // Even-index elements
20517 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20518 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20519 __ psllw($xtmp$$XMMRegister, 8);
20520 __ psrlw($xtmp$$XMMRegister, 8);
20521 // Combine
20522 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20523 %}
20524 ins_pipe( pipe_slow );
20525 %}
20526
20527 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20528 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20529 match(Set dst (MulVB src1 src2));
20530 effect(TEMP xtmp1, TEMP xtmp2);
20531 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20532 ins_encode %{
20533 int vlen_enc = vector_length_encoding(this);
20534 // Odd-index elements
20535 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20536 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20537 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20538 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20539 // Even-index elements
20540 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20541 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20542 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20543 // Combine
20544 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20545 %}
20546 ins_pipe( pipe_slow );
20547 %}
20548
20549 // Shorts/Chars vector mul
20550 instruct vmulS(vec dst, vec src) %{
20551 predicate(UseAVX == 0);
20552 match(Set dst (MulVS dst src));
20553 format %{ "pmullw $dst,$src\t! mul packedS" %}
20554 ins_encode %{
20555 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20556 %}
20557 ins_pipe( pipe_slow );
20558 %}
20559
20560 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20561 predicate(UseAVX > 0);
20562 match(Set dst (MulVS src1 src2));
20563 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20564 ins_encode %{
20565 int vlen_enc = vector_length_encoding(this);
20566 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20567 %}
20568 ins_pipe( pipe_slow );
20569 %}
20570
20571 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20572 predicate((UseAVX > 0) &&
20573 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20574 match(Set dst (MulVS src (LoadVector mem)));
20575 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20576 ins_encode %{
20577 int vlen_enc = vector_length_encoding(this);
20578 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20579 %}
20580 ins_pipe( pipe_slow );
20581 %}
20582
20583 // Integers vector mul
20584 instruct vmulI(vec dst, vec src) %{
20585 predicate(UseAVX == 0);
20586 match(Set dst (MulVI dst src));
20587 format %{ "pmulld $dst,$src\t! mul packedI" %}
20588 ins_encode %{
20589 assert(UseSSE > 3, "required");
20590 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20591 %}
20592 ins_pipe( pipe_slow );
20593 %}
20594
20595 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20596 predicate(UseAVX > 0);
20597 match(Set dst (MulVI src1 src2));
20598 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20599 ins_encode %{
20600 int vlen_enc = vector_length_encoding(this);
20601 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20602 %}
20603 ins_pipe( pipe_slow );
20604 %}
20605
20606 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20607 predicate((UseAVX > 0) &&
20608 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20609 match(Set dst (MulVI src (LoadVector mem)));
20610 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20611 ins_encode %{
20612 int vlen_enc = vector_length_encoding(this);
20613 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20614 %}
20615 ins_pipe( pipe_slow );
20616 %}
20617
20618 // Longs vector mul
20619 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20620 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20621 VM_Version::supports_avx512dq()) ||
20622 VM_Version::supports_avx512vldq());
20623 match(Set dst (MulVL src1 src2));
20624 ins_cost(500);
20625 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20626 ins_encode %{
20627 assert(UseAVX > 2, "required");
20628 int vlen_enc = vector_length_encoding(this);
20629 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20630 %}
20631 ins_pipe( pipe_slow );
20632 %}
20633
20634 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20635 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20636 VM_Version::supports_avx512dq()) ||
20637 (Matcher::vector_length_in_bytes(n) > 8 &&
20638 VM_Version::supports_avx512vldq()));
20639 match(Set dst (MulVL src (LoadVector mem)));
20640 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20641 ins_cost(500);
20642 ins_encode %{
20643 assert(UseAVX > 2, "required");
20644 int vlen_enc = vector_length_encoding(this);
20645 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20646 %}
20647 ins_pipe( pipe_slow );
20648 %}
20649
20650 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20651 predicate(UseAVX == 0);
20652 match(Set dst (MulVL src1 src2));
20653 ins_cost(500);
20654 effect(TEMP dst, TEMP xtmp);
20655 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20656 ins_encode %{
20657 assert(VM_Version::supports_sse4_1(), "required");
20658 // Get the lo-hi products, only the lower 32 bits is in concerns
20659 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20660 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20661 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20662 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20663 __ psllq($dst$$XMMRegister, 32);
20664 // Get the lo-lo products
20665 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20666 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20667 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20668 %}
20669 ins_pipe( pipe_slow );
20670 %}
20671
20672 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20673 predicate(UseAVX > 0 &&
20674 ((Matcher::vector_length_in_bytes(n) == 64 &&
20675 !VM_Version::supports_avx512dq()) ||
20676 (Matcher::vector_length_in_bytes(n) < 64 &&
20677 !VM_Version::supports_avx512vldq())));
20678 match(Set dst (MulVL src1 src2));
20679 effect(TEMP xtmp1, TEMP xtmp2);
20680 ins_cost(500);
20681 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20682 ins_encode %{
20683 int vlen_enc = vector_length_encoding(this);
20684 // Get the lo-hi products, only the lower 32 bits is in concerns
20685 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20686 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20687 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20688 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20689 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20690 // Get the lo-lo products
20691 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20692 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20693 %}
20694 ins_pipe( pipe_slow );
20695 %}
20696
20697 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20698 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20699 match(Set dst (MulVL src1 src2));
20700 ins_cost(100);
20701 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20702 ins_encode %{
20703 int vlen_enc = vector_length_encoding(this);
20704 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20705 %}
20706 ins_pipe( pipe_slow );
20707 %}
20708
20709 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20710 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20711 match(Set dst (MulVL src1 src2));
20712 ins_cost(100);
20713 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20714 ins_encode %{
20715 int vlen_enc = vector_length_encoding(this);
20716 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20717 %}
20718 ins_pipe( pipe_slow );
20719 %}
20720
20721 // Floats vector mul
20722 instruct vmulF(vec dst, vec src) %{
20723 predicate(UseAVX == 0);
20724 match(Set dst (MulVF dst src));
20725 format %{ "mulps $dst,$src\t! mul packedF" %}
20726 ins_encode %{
20727 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20728 %}
20729 ins_pipe( pipe_slow );
20730 %}
20731
20732 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20733 predicate(UseAVX > 0);
20734 match(Set dst (MulVF src1 src2));
20735 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20736 ins_encode %{
20737 int vlen_enc = vector_length_encoding(this);
20738 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20739 %}
20740 ins_pipe( pipe_slow );
20741 %}
20742
20743 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20744 predicate((UseAVX > 0) &&
20745 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20746 match(Set dst (MulVF src (LoadVector mem)));
20747 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20748 ins_encode %{
20749 int vlen_enc = vector_length_encoding(this);
20750 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20751 %}
20752 ins_pipe( pipe_slow );
20753 %}
20754
20755 // Doubles vector mul
20756 instruct vmulD(vec dst, vec src) %{
20757 predicate(UseAVX == 0);
20758 match(Set dst (MulVD dst src));
20759 format %{ "mulpd $dst,$src\t! mul packedD" %}
20760 ins_encode %{
20761 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20762 %}
20763 ins_pipe( pipe_slow );
20764 %}
20765
20766 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20767 predicate(UseAVX > 0);
20768 match(Set dst (MulVD src1 src2));
20769 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20770 ins_encode %{
20771 int vlen_enc = vector_length_encoding(this);
20772 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20773 %}
20774 ins_pipe( pipe_slow );
20775 %}
20776
20777 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20778 predicate((UseAVX > 0) &&
20779 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20780 match(Set dst (MulVD src (LoadVector mem)));
20781 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20782 ins_encode %{
20783 int vlen_enc = vector_length_encoding(this);
20784 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20785 %}
20786 ins_pipe( pipe_slow );
20787 %}
20788
20789 // --------------------------------- DIV --------------------------------------
20790
20791 // Floats vector div
20792 instruct vdivF(vec dst, vec src) %{
20793 predicate(UseAVX == 0);
20794 match(Set dst (DivVF dst src));
20795 format %{ "divps $dst,$src\t! div packedF" %}
20796 ins_encode %{
20797 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20798 %}
20799 ins_pipe( pipe_slow );
20800 %}
20801
20802 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20803 predicate(UseAVX > 0);
20804 match(Set dst (DivVF src1 src2));
20805 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20806 ins_encode %{
20807 int vlen_enc = vector_length_encoding(this);
20808 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20809 %}
20810 ins_pipe( pipe_slow );
20811 %}
20812
20813 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20814 predicate((UseAVX > 0) &&
20815 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20816 match(Set dst (DivVF src (LoadVector mem)));
20817 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20818 ins_encode %{
20819 int vlen_enc = vector_length_encoding(this);
20820 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20821 %}
20822 ins_pipe( pipe_slow );
20823 %}
20824
20825 // Doubles vector div
20826 instruct vdivD(vec dst, vec src) %{
20827 predicate(UseAVX == 0);
20828 match(Set dst (DivVD dst src));
20829 format %{ "divpd $dst,$src\t! div packedD" %}
20830 ins_encode %{
20831 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20832 %}
20833 ins_pipe( pipe_slow );
20834 %}
20835
20836 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20837 predicate(UseAVX > 0);
20838 match(Set dst (DivVD src1 src2));
20839 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20840 ins_encode %{
20841 int vlen_enc = vector_length_encoding(this);
20842 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20843 %}
20844 ins_pipe( pipe_slow );
20845 %}
20846
20847 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20848 predicate((UseAVX > 0) &&
20849 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20850 match(Set dst (DivVD src (LoadVector mem)));
20851 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20852 ins_encode %{
20853 int vlen_enc = vector_length_encoding(this);
20854 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20855 %}
20856 ins_pipe( pipe_slow );
20857 %}
20858
20859 // ------------------------------ MinMax ---------------------------------------
20860
20861 // Byte, Short, Int vector Min/Max
20862 instruct minmax_reg_sse(vec dst, vec src) %{
20863 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20864 UseAVX == 0);
20865 match(Set dst (MinV dst src));
20866 match(Set dst (MaxV dst src));
20867 format %{ "vector_minmax $dst,$src\t! " %}
20868 ins_encode %{
20869 assert(UseSSE >= 4, "required");
20870
20871 int opcode = this->ideal_Opcode();
20872 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20873 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20874 %}
20875 ins_pipe( pipe_slow );
20876 %}
20877
20878 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20879 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20880 UseAVX > 0);
20881 match(Set dst (MinV src1 src2));
20882 match(Set dst (MaxV src1 src2));
20883 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20884 ins_encode %{
20885 int opcode = this->ideal_Opcode();
20886 int vlen_enc = vector_length_encoding(this);
20887 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20888
20889 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20890 %}
20891 ins_pipe( pipe_slow );
20892 %}
20893
20894 // Long vector Min/Max
20895 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20896 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20897 UseAVX == 0);
20898 match(Set dst (MinV dst src));
20899 match(Set dst (MaxV src dst));
20900 effect(TEMP dst, TEMP tmp);
20901 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20902 ins_encode %{
20903 assert(UseSSE >= 4, "required");
20904
20905 int opcode = this->ideal_Opcode();
20906 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20907 assert(elem_bt == T_LONG, "sanity");
20908
20909 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20910 %}
20911 ins_pipe( pipe_slow );
20912 %}
20913
20914 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20915 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20916 UseAVX > 0 && !VM_Version::supports_avx512vl());
20917 match(Set dst (MinV src1 src2));
20918 match(Set dst (MaxV src1 src2));
20919 effect(TEMP dst);
20920 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20921 ins_encode %{
20922 int vlen_enc = vector_length_encoding(this);
20923 int opcode = this->ideal_Opcode();
20924 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20925 assert(elem_bt == T_LONG, "sanity");
20926
20927 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20928 %}
20929 ins_pipe( pipe_slow );
20930 %}
20931
20932 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20933 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20934 Matcher::vector_element_basic_type(n) == T_LONG);
20935 match(Set dst (MinV src1 src2));
20936 match(Set dst (MaxV src1 src2));
20937 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20938 ins_encode %{
20939 assert(UseAVX > 2, "required");
20940
20941 int vlen_enc = vector_length_encoding(this);
20942 int opcode = this->ideal_Opcode();
20943 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20944 assert(elem_bt == T_LONG, "sanity");
20945
20946 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20947 %}
20948 ins_pipe( pipe_slow );
20949 %}
20950
20951 // Float/Double vector Min/Max
20952 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20953 predicate(VM_Version::supports_avx10_2() &&
20954 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20955 match(Set dst (MinV a b));
20956 match(Set dst (MaxV a b));
20957 format %{ "vector_minmaxFP $dst, $a, $b" %}
20958 ins_encode %{
20959 int vlen_enc = vector_length_encoding(this);
20960 int opcode = this->ideal_Opcode();
20961 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20962 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20963 %}
20964 ins_pipe( pipe_slow );
20965 %}
20966
20967 // Float/Double vector Min/Max
20968 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20969 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20970 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20971 UseAVX > 0);
20972 match(Set dst (MinV a b));
20973 match(Set dst (MaxV a b));
20974 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20975 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20976 ins_encode %{
20977 assert(UseAVX > 0, "required");
20978
20979 int opcode = this->ideal_Opcode();
20980 int vlen_enc = vector_length_encoding(this);
20981 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20982
20983 __ vminmax_fp(opcode, elem_bt,
20984 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20985 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20986 %}
20987 ins_pipe( pipe_slow );
20988 %}
20989
20990 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20991 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20992 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20993 match(Set dst (MinV a b));
20994 match(Set dst (MaxV a b));
20995 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20996 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20997 ins_encode %{
20998 assert(UseAVX > 2, "required");
20999
21000 int opcode = this->ideal_Opcode();
21001 int vlen_enc = vector_length_encoding(this);
21002 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21003
21004 __ evminmax_fp(opcode, elem_bt,
21005 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21006 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21007 %}
21008 ins_pipe( pipe_slow );
21009 %}
21010
21011 // ------------------------------ Unsigned vector Min/Max ----------------------
21012
21013 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21014 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21015 match(Set dst (UMinV a b));
21016 match(Set dst (UMaxV a b));
21017 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21018 ins_encode %{
21019 int opcode = this->ideal_Opcode();
21020 int vlen_enc = vector_length_encoding(this);
21021 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21022 assert(is_integral_type(elem_bt), "");
21023 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21024 %}
21025 ins_pipe( pipe_slow );
21026 %}
21027
21028 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21029 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21030 match(Set dst (UMinV a (LoadVector b)));
21031 match(Set dst (UMaxV a (LoadVector b)));
21032 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21033 ins_encode %{
21034 int opcode = this->ideal_Opcode();
21035 int vlen_enc = vector_length_encoding(this);
21036 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21037 assert(is_integral_type(elem_bt), "");
21038 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21039 %}
21040 ins_pipe( pipe_slow );
21041 %}
21042
21043 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21044 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21045 match(Set dst (UMinV a b));
21046 match(Set dst (UMaxV a b));
21047 effect(TEMP xtmp1, TEMP xtmp2);
21048 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21049 ins_encode %{
21050 int opcode = this->ideal_Opcode();
21051 int vlen_enc = vector_length_encoding(this);
21052 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21053 %}
21054 ins_pipe( pipe_slow );
21055 %}
21056
21057 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21058 match(Set dst (UMinV (Binary dst src2) mask));
21059 match(Set dst (UMaxV (Binary dst src2) mask));
21060 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21061 ins_encode %{
21062 int vlen_enc = vector_length_encoding(this);
21063 BasicType bt = Matcher::vector_element_basic_type(this);
21064 int opc = this->ideal_Opcode();
21065 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21066 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21067 %}
21068 ins_pipe( pipe_slow );
21069 %}
21070
21071 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21072 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21073 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21074 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21075 ins_encode %{
21076 int vlen_enc = vector_length_encoding(this);
21077 BasicType bt = Matcher::vector_element_basic_type(this);
21078 int opc = this->ideal_Opcode();
21079 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21080 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21081 %}
21082 ins_pipe( pipe_slow );
21083 %}
21084
21085 // --------------------------------- Signum/CopySign ---------------------------
21086
21087 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21088 match(Set dst (SignumF dst (Binary zero one)));
21089 effect(KILL cr);
21090 format %{ "signumF $dst, $dst" %}
21091 ins_encode %{
21092 int opcode = this->ideal_Opcode();
21093 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21094 %}
21095 ins_pipe( pipe_slow );
21096 %}
21097
21098 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21099 match(Set dst (SignumD dst (Binary zero one)));
21100 effect(KILL cr);
21101 format %{ "signumD $dst, $dst" %}
21102 ins_encode %{
21103 int opcode = this->ideal_Opcode();
21104 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21105 %}
21106 ins_pipe( pipe_slow );
21107 %}
21108
21109 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21110 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21111 match(Set dst (SignumVF src (Binary zero one)));
21112 match(Set dst (SignumVD src (Binary zero one)));
21113 effect(TEMP dst, TEMP xtmp1);
21114 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21115 ins_encode %{
21116 int opcode = this->ideal_Opcode();
21117 int vec_enc = vector_length_encoding(this);
21118 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21119 $xtmp1$$XMMRegister, vec_enc);
21120 %}
21121 ins_pipe( pipe_slow );
21122 %}
21123
21124 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21125 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21126 match(Set dst (SignumVF src (Binary zero one)));
21127 match(Set dst (SignumVD src (Binary zero one)));
21128 effect(TEMP dst, TEMP ktmp1);
21129 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21130 ins_encode %{
21131 int opcode = this->ideal_Opcode();
21132 int vec_enc = vector_length_encoding(this);
21133 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21134 $ktmp1$$KRegister, vec_enc);
21135 %}
21136 ins_pipe( pipe_slow );
21137 %}
21138
21139 // ---------------------------------------
21140 // For copySign use 0xE4 as writemask for vpternlog
21141 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21142 // C (xmm2) is set to 0x7FFFFFFF
21143 // Wherever xmm2 is 0, we want to pick from B (sign)
21144 // Wherever xmm2 is 1, we want to pick from A (src)
21145 //
21146 // A B C Result
21147 // 0 0 0 0
21148 // 0 0 1 0
21149 // 0 1 0 1
21150 // 0 1 1 0
21151 // 1 0 0 0
21152 // 1 0 1 1
21153 // 1 1 0 1
21154 // 1 1 1 1
21155 //
21156 // Result going from high bit to low bit is 0x11100100 = 0xe4
21157 // ---------------------------------------
21158
21159 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21160 match(Set dst (CopySignF dst src));
21161 effect(TEMP tmp1, TEMP tmp2);
21162 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21163 ins_encode %{
21164 __ movl($tmp2$$Register, 0x7FFFFFFF);
21165 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21166 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21167 %}
21168 ins_pipe( pipe_slow );
21169 %}
21170
21171 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21172 match(Set dst (CopySignD dst (Binary src zero)));
21173 ins_cost(100);
21174 effect(TEMP tmp1, TEMP tmp2);
21175 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21176 ins_encode %{
21177 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21178 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21179 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21180 %}
21181 ins_pipe( pipe_slow );
21182 %}
21183
21184 //----------------------------- CompressBits/ExpandBits ------------------------
21185
21186 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21187 predicate(n->bottom_type()->isa_int());
21188 match(Set dst (CompressBits src mask));
21189 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21190 ins_encode %{
21191 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21192 %}
21193 ins_pipe( pipe_slow );
21194 %}
21195
21196 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21197 predicate(n->bottom_type()->isa_int());
21198 match(Set dst (ExpandBits src mask));
21199 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21200 ins_encode %{
21201 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21202 %}
21203 ins_pipe( pipe_slow );
21204 %}
21205
21206 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21207 predicate(n->bottom_type()->isa_int());
21208 match(Set dst (CompressBits src (LoadI mask)));
21209 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21210 ins_encode %{
21211 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21212 %}
21213 ins_pipe( pipe_slow );
21214 %}
21215
21216 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21217 predicate(n->bottom_type()->isa_int());
21218 match(Set dst (ExpandBits src (LoadI mask)));
21219 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21220 ins_encode %{
21221 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21222 %}
21223 ins_pipe( pipe_slow );
21224 %}
21225
21226 // --------------------------------- Sqrt --------------------------------------
21227
21228 instruct vsqrtF_reg(vec dst, vec src) %{
21229 match(Set dst (SqrtVF src));
21230 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21231 ins_encode %{
21232 assert(UseAVX > 0, "required");
21233 int vlen_enc = vector_length_encoding(this);
21234 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21235 %}
21236 ins_pipe( pipe_slow );
21237 %}
21238
21239 instruct vsqrtF_mem(vec dst, memory mem) %{
21240 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21241 match(Set dst (SqrtVF (LoadVector mem)));
21242 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21243 ins_encode %{
21244 assert(UseAVX > 0, "required");
21245 int vlen_enc = vector_length_encoding(this);
21246 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21247 %}
21248 ins_pipe( pipe_slow );
21249 %}
21250
21251 // Floating point vector sqrt
21252 instruct vsqrtD_reg(vec dst, vec src) %{
21253 match(Set dst (SqrtVD src));
21254 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21255 ins_encode %{
21256 assert(UseAVX > 0, "required");
21257 int vlen_enc = vector_length_encoding(this);
21258 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21259 %}
21260 ins_pipe( pipe_slow );
21261 %}
21262
21263 instruct vsqrtD_mem(vec dst, memory mem) %{
21264 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21265 match(Set dst (SqrtVD (LoadVector mem)));
21266 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21267 ins_encode %{
21268 assert(UseAVX > 0, "required");
21269 int vlen_enc = vector_length_encoding(this);
21270 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21271 %}
21272 ins_pipe( pipe_slow );
21273 %}
21274
21275 // ------------------------------ Shift ---------------------------------------
21276
21277 // Left and right shift count vectors are the same on x86
21278 // (only lowest bits of xmm reg are used for count).
21279 instruct vshiftcnt(vec dst, rRegI cnt) %{
21280 match(Set dst (LShiftCntV cnt));
21281 match(Set dst (RShiftCntV cnt));
21282 format %{ "movdl $dst,$cnt\t! load shift count" %}
21283 ins_encode %{
21284 __ movdl($dst$$XMMRegister, $cnt$$Register);
21285 %}
21286 ins_pipe( pipe_slow );
21287 %}
21288
21289 // Byte vector shift
21290 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21291 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21292 match(Set dst ( LShiftVB src shift));
21293 match(Set dst ( RShiftVB src shift));
21294 match(Set dst (URShiftVB src shift));
21295 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21296 format %{"vector_byte_shift $dst,$src,$shift" %}
21297 ins_encode %{
21298 assert(UseSSE > 3, "required");
21299 int opcode = this->ideal_Opcode();
21300 bool sign = (opcode != Op_URShiftVB);
21301 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21302 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21303 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21304 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21305 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21306 %}
21307 ins_pipe( pipe_slow );
21308 %}
21309
21310 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21311 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21312 UseAVX <= 1);
21313 match(Set dst ( LShiftVB src shift));
21314 match(Set dst ( RShiftVB src shift));
21315 match(Set dst (URShiftVB src shift));
21316 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21317 format %{"vector_byte_shift $dst,$src,$shift" %}
21318 ins_encode %{
21319 assert(UseSSE > 3, "required");
21320 int opcode = this->ideal_Opcode();
21321 bool sign = (opcode != Op_URShiftVB);
21322 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21323 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21324 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21325 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21326 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21327 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21328 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21329 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21330 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21331 %}
21332 ins_pipe( pipe_slow );
21333 %}
21334
21335 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21336 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21337 UseAVX > 1);
21338 match(Set dst ( LShiftVB src shift));
21339 match(Set dst ( RShiftVB src shift));
21340 match(Set dst (URShiftVB src shift));
21341 effect(TEMP dst, TEMP tmp);
21342 format %{"vector_byte_shift $dst,$src,$shift" %}
21343 ins_encode %{
21344 int opcode = this->ideal_Opcode();
21345 bool sign = (opcode != Op_URShiftVB);
21346 int vlen_enc = Assembler::AVX_256bit;
21347 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21348 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21349 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21350 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21351 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21352 %}
21353 ins_pipe( pipe_slow );
21354 %}
21355
21356 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21357 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21358 match(Set dst ( LShiftVB src shift));
21359 match(Set dst ( RShiftVB src shift));
21360 match(Set dst (URShiftVB src shift));
21361 effect(TEMP dst, TEMP tmp);
21362 format %{"vector_byte_shift $dst,$src,$shift" %}
21363 ins_encode %{
21364 assert(UseAVX > 1, "required");
21365 int opcode = this->ideal_Opcode();
21366 bool sign = (opcode != Op_URShiftVB);
21367 int vlen_enc = Assembler::AVX_256bit;
21368 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21369 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21370 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21371 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21372 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21373 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21374 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21375 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21376 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21377 %}
21378 ins_pipe( pipe_slow );
21379 %}
21380
21381 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21382 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21383 match(Set dst ( LShiftVB src shift));
21384 match(Set dst (RShiftVB src shift));
21385 match(Set dst (URShiftVB src shift));
21386 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21387 format %{"vector_byte_shift $dst,$src,$shift" %}
21388 ins_encode %{
21389 assert(UseAVX > 2, "required");
21390 int opcode = this->ideal_Opcode();
21391 bool sign = (opcode != Op_URShiftVB);
21392 int vlen_enc = Assembler::AVX_512bit;
21393 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21394 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21395 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21396 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21397 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21398 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21399 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21400 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21401 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21402 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21403 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21404 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405 %}
21406 ins_pipe( pipe_slow );
21407 %}
21408
21409 // Shorts vector logical right shift produces incorrect Java result
21410 // for negative data because java code convert short value into int with
21411 // sign extension before a shift. But char vectors are fine since chars are
21412 // unsigned values.
21413 // Shorts/Chars vector left shift
21414 instruct vshiftS(vec dst, vec src, vec shift) %{
21415 predicate(!n->as_ShiftV()->is_var_shift());
21416 match(Set dst ( LShiftVS src shift));
21417 match(Set dst ( RShiftVS src shift));
21418 match(Set dst (URShiftVS src shift));
21419 effect(TEMP dst, USE src, USE shift);
21420 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21421 ins_encode %{
21422 int opcode = this->ideal_Opcode();
21423 if (UseAVX > 0) {
21424 int vlen_enc = vector_length_encoding(this);
21425 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21426 } else {
21427 int vlen = Matcher::vector_length(this);
21428 if (vlen == 2) {
21429 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21430 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21431 } else if (vlen == 4) {
21432 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21433 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21434 } else {
21435 assert (vlen == 8, "sanity");
21436 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21437 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21438 }
21439 }
21440 %}
21441 ins_pipe( pipe_slow );
21442 %}
21443
21444 // Integers vector left shift
21445 instruct vshiftI(vec dst, vec src, vec shift) %{
21446 predicate(!n->as_ShiftV()->is_var_shift());
21447 match(Set dst ( LShiftVI src shift));
21448 match(Set dst ( RShiftVI src shift));
21449 match(Set dst (URShiftVI src shift));
21450 effect(TEMP dst, USE src, USE shift);
21451 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21452 ins_encode %{
21453 int opcode = this->ideal_Opcode();
21454 if (UseAVX > 0) {
21455 int vlen_enc = vector_length_encoding(this);
21456 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21457 } else {
21458 int vlen = Matcher::vector_length(this);
21459 if (vlen == 2) {
21460 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21461 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21462 } else {
21463 assert(vlen == 4, "sanity");
21464 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21465 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21466 }
21467 }
21468 %}
21469 ins_pipe( pipe_slow );
21470 %}
21471
21472 // Integers vector left constant shift
21473 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21474 match(Set dst (LShiftVI src (LShiftCntV shift)));
21475 match(Set dst (RShiftVI src (RShiftCntV shift)));
21476 match(Set dst (URShiftVI src (RShiftCntV shift)));
21477 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21478 ins_encode %{
21479 int opcode = this->ideal_Opcode();
21480 if (UseAVX > 0) {
21481 int vector_len = vector_length_encoding(this);
21482 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21483 } else {
21484 int vlen = Matcher::vector_length(this);
21485 if (vlen == 2) {
21486 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21487 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21488 } else {
21489 assert(vlen == 4, "sanity");
21490 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21491 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21492 }
21493 }
21494 %}
21495 ins_pipe( pipe_slow );
21496 %}
21497
21498 // Longs vector shift
21499 instruct vshiftL(vec dst, vec src, vec shift) %{
21500 predicate(!n->as_ShiftV()->is_var_shift());
21501 match(Set dst ( LShiftVL src shift));
21502 match(Set dst (URShiftVL src shift));
21503 effect(TEMP dst, USE src, USE shift);
21504 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21505 ins_encode %{
21506 int opcode = this->ideal_Opcode();
21507 if (UseAVX > 0) {
21508 int vlen_enc = vector_length_encoding(this);
21509 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21510 } else {
21511 assert(Matcher::vector_length(this) == 2, "");
21512 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21513 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21514 }
21515 %}
21516 ins_pipe( pipe_slow );
21517 %}
21518
21519 // Longs vector constant shift
21520 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21521 match(Set dst (LShiftVL src (LShiftCntV shift)));
21522 match(Set dst (URShiftVL src (RShiftCntV shift)));
21523 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21524 ins_encode %{
21525 int opcode = this->ideal_Opcode();
21526 if (UseAVX > 0) {
21527 int vector_len = vector_length_encoding(this);
21528 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21529 } else {
21530 assert(Matcher::vector_length(this) == 2, "");
21531 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21532 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21533 }
21534 %}
21535 ins_pipe( pipe_slow );
21536 %}
21537
21538 // -------------------ArithmeticRightShift -----------------------------------
21539 // Long vector arithmetic right shift
21540 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21541 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21542 match(Set dst (RShiftVL src shift));
21543 effect(TEMP dst, TEMP tmp);
21544 format %{ "vshiftq $dst,$src,$shift" %}
21545 ins_encode %{
21546 uint vlen = Matcher::vector_length(this);
21547 if (vlen == 2) {
21548 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21549 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21550 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21551 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21552 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21553 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21554 } else {
21555 assert(vlen == 4, "sanity");
21556 assert(UseAVX > 1, "required");
21557 int vlen_enc = Assembler::AVX_256bit;
21558 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21559 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21560 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21561 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21562 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21563 }
21564 %}
21565 ins_pipe( pipe_slow );
21566 %}
21567
21568 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21569 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21570 match(Set dst (RShiftVL src shift));
21571 format %{ "vshiftq $dst,$src,$shift" %}
21572 ins_encode %{
21573 int vlen_enc = vector_length_encoding(this);
21574 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21575 %}
21576 ins_pipe( pipe_slow );
21577 %}
21578
21579 // ------------------- Variable Shift -----------------------------
21580 // Byte variable shift
21581 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21582 predicate(Matcher::vector_length(n) <= 8 &&
21583 n->as_ShiftV()->is_var_shift() &&
21584 !VM_Version::supports_avx512bw());
21585 match(Set dst ( LShiftVB src shift));
21586 match(Set dst ( RShiftVB src shift));
21587 match(Set dst (URShiftVB src shift));
21588 effect(TEMP dst, TEMP vtmp);
21589 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21590 ins_encode %{
21591 assert(UseAVX >= 2, "required");
21592
21593 int opcode = this->ideal_Opcode();
21594 int vlen_enc = Assembler::AVX_128bit;
21595 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21596 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21597 %}
21598 ins_pipe( pipe_slow );
21599 %}
21600
21601 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21602 predicate(Matcher::vector_length(n) == 16 &&
21603 n->as_ShiftV()->is_var_shift() &&
21604 !VM_Version::supports_avx512bw());
21605 match(Set dst ( LShiftVB src shift));
21606 match(Set dst ( RShiftVB src shift));
21607 match(Set dst (URShiftVB src shift));
21608 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21609 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21610 ins_encode %{
21611 assert(UseAVX >= 2, "required");
21612
21613 int opcode = this->ideal_Opcode();
21614 int vlen_enc = Assembler::AVX_128bit;
21615 // Shift lower half and get word result in dst
21616 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21617
21618 // Shift upper half and get word result in vtmp1
21619 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21620 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21621 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21622
21623 // Merge and down convert the two word results to byte in dst
21624 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21625 %}
21626 ins_pipe( pipe_slow );
21627 %}
21628
21629 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21630 predicate(Matcher::vector_length(n) == 32 &&
21631 n->as_ShiftV()->is_var_shift() &&
21632 !VM_Version::supports_avx512bw());
21633 match(Set dst ( LShiftVB src shift));
21634 match(Set dst ( RShiftVB src shift));
21635 match(Set dst (URShiftVB src shift));
21636 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21637 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21638 ins_encode %{
21639 assert(UseAVX >= 2, "required");
21640
21641 int opcode = this->ideal_Opcode();
21642 int vlen_enc = Assembler::AVX_128bit;
21643 // Process lower 128 bits and get result in dst
21644 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21645 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21646 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21647 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21648 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21649
21650 // Process higher 128 bits and get result in vtmp3
21651 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21652 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21653 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21654 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21655 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21656 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21657 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21658
21659 // Merge the two results in dst
21660 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21661 %}
21662 ins_pipe( pipe_slow );
21663 %}
21664
21665 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21666 predicate(Matcher::vector_length(n) <= 32 &&
21667 n->as_ShiftV()->is_var_shift() &&
21668 VM_Version::supports_avx512bw());
21669 match(Set dst ( LShiftVB src shift));
21670 match(Set dst ( RShiftVB src shift));
21671 match(Set dst (URShiftVB src shift));
21672 effect(TEMP dst, TEMP vtmp);
21673 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21674 ins_encode %{
21675 assert(UseAVX > 2, "required");
21676
21677 int opcode = this->ideal_Opcode();
21678 int vlen_enc = vector_length_encoding(this);
21679 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21680 %}
21681 ins_pipe( pipe_slow );
21682 %}
21683
21684 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21685 predicate(Matcher::vector_length(n) == 64 &&
21686 n->as_ShiftV()->is_var_shift() &&
21687 VM_Version::supports_avx512bw());
21688 match(Set dst ( LShiftVB src shift));
21689 match(Set dst ( RShiftVB src shift));
21690 match(Set dst (URShiftVB src shift));
21691 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21692 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21693 ins_encode %{
21694 assert(UseAVX > 2, "required");
21695
21696 int opcode = this->ideal_Opcode();
21697 int vlen_enc = Assembler::AVX_256bit;
21698 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21699 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21700 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21701 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21702 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21703 %}
21704 ins_pipe( pipe_slow );
21705 %}
21706
21707 // Short variable shift
21708 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21709 predicate(Matcher::vector_length(n) <= 8 &&
21710 n->as_ShiftV()->is_var_shift() &&
21711 !VM_Version::supports_avx512bw());
21712 match(Set dst ( LShiftVS src shift));
21713 match(Set dst ( RShiftVS src shift));
21714 match(Set dst (URShiftVS src shift));
21715 effect(TEMP dst, TEMP vtmp);
21716 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21717 ins_encode %{
21718 assert(UseAVX >= 2, "required");
21719
21720 int opcode = this->ideal_Opcode();
21721 bool sign = (opcode != Op_URShiftVS);
21722 int vlen_enc = Assembler::AVX_256bit;
21723 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21724 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21725 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21726 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21727 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21728 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21729 %}
21730 ins_pipe( pipe_slow );
21731 %}
21732
21733 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21734 predicate(Matcher::vector_length(n) == 16 &&
21735 n->as_ShiftV()->is_var_shift() &&
21736 !VM_Version::supports_avx512bw());
21737 match(Set dst ( LShiftVS src shift));
21738 match(Set dst ( RShiftVS src shift));
21739 match(Set dst (URShiftVS src shift));
21740 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21741 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21742 ins_encode %{
21743 assert(UseAVX >= 2, "required");
21744
21745 int opcode = this->ideal_Opcode();
21746 bool sign = (opcode != Op_URShiftVS);
21747 int vlen_enc = Assembler::AVX_256bit;
21748 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21749 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21751 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21752 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21753
21754 // Shift upper half, with result in dst using vtmp1 as TEMP
21755 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21756 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21757 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21758 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21759 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21760 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21761
21762 // Merge lower and upper half result into dst
21763 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21764 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21765 %}
21766 ins_pipe( pipe_slow );
21767 %}
21768
21769 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21770 predicate(n->as_ShiftV()->is_var_shift() &&
21771 VM_Version::supports_avx512bw());
21772 match(Set dst ( LShiftVS src shift));
21773 match(Set dst ( RShiftVS src shift));
21774 match(Set dst (URShiftVS src shift));
21775 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21776 ins_encode %{
21777 assert(UseAVX > 2, "required");
21778
21779 int opcode = this->ideal_Opcode();
21780 int vlen_enc = vector_length_encoding(this);
21781 if (!VM_Version::supports_avx512vl()) {
21782 vlen_enc = Assembler::AVX_512bit;
21783 }
21784 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21785 %}
21786 ins_pipe( pipe_slow );
21787 %}
21788
21789 //Integer variable shift
21790 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21791 predicate(n->as_ShiftV()->is_var_shift());
21792 match(Set dst ( LShiftVI src shift));
21793 match(Set dst ( RShiftVI src shift));
21794 match(Set dst (URShiftVI src shift));
21795 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21796 ins_encode %{
21797 assert(UseAVX >= 2, "required");
21798
21799 int opcode = this->ideal_Opcode();
21800 int vlen_enc = vector_length_encoding(this);
21801 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21802 %}
21803 ins_pipe( pipe_slow );
21804 %}
21805
21806 //Long variable shift
21807 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21808 predicate(n->as_ShiftV()->is_var_shift());
21809 match(Set dst ( LShiftVL src shift));
21810 match(Set dst (URShiftVL src shift));
21811 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21812 ins_encode %{
21813 assert(UseAVX >= 2, "required");
21814
21815 int opcode = this->ideal_Opcode();
21816 int vlen_enc = vector_length_encoding(this);
21817 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21818 %}
21819 ins_pipe( pipe_slow );
21820 %}
21821
21822 //Long variable right shift arithmetic
21823 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21824 predicate(Matcher::vector_length(n) <= 4 &&
21825 n->as_ShiftV()->is_var_shift() &&
21826 UseAVX == 2);
21827 match(Set dst (RShiftVL src shift));
21828 effect(TEMP dst, TEMP vtmp);
21829 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21830 ins_encode %{
21831 int opcode = this->ideal_Opcode();
21832 int vlen_enc = vector_length_encoding(this);
21833 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21834 $vtmp$$XMMRegister);
21835 %}
21836 ins_pipe( pipe_slow );
21837 %}
21838
21839 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21840 predicate(n->as_ShiftV()->is_var_shift() &&
21841 UseAVX > 2);
21842 match(Set dst (RShiftVL src shift));
21843 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21844 ins_encode %{
21845 int opcode = this->ideal_Opcode();
21846 int vlen_enc = vector_length_encoding(this);
21847 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21848 %}
21849 ins_pipe( pipe_slow );
21850 %}
21851
21852 // --------------------------------- AND --------------------------------------
21853
21854 instruct vand(vec dst, vec src) %{
21855 predicate(UseAVX == 0);
21856 match(Set dst (AndV dst src));
21857 format %{ "pand $dst,$src\t! and vectors" %}
21858 ins_encode %{
21859 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21860 %}
21861 ins_pipe( pipe_slow );
21862 %}
21863
21864 instruct vand_reg(vec dst, vec src1, vec src2) %{
21865 predicate(UseAVX > 0);
21866 match(Set dst (AndV src1 src2));
21867 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21868 ins_encode %{
21869 int vlen_enc = vector_length_encoding(this);
21870 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21871 %}
21872 ins_pipe( pipe_slow );
21873 %}
21874
21875 instruct vand_mem(vec dst, vec src, memory mem) %{
21876 predicate((UseAVX > 0) &&
21877 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21878 match(Set dst (AndV src (LoadVector mem)));
21879 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21880 ins_encode %{
21881 int vlen_enc = vector_length_encoding(this);
21882 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21883 %}
21884 ins_pipe( pipe_slow );
21885 %}
21886
21887 // --------------------------------- OR ---------------------------------------
21888
21889 instruct vor(vec dst, vec src) %{
21890 predicate(UseAVX == 0);
21891 match(Set dst (OrV dst src));
21892 format %{ "por $dst,$src\t! or vectors" %}
21893 ins_encode %{
21894 __ por($dst$$XMMRegister, $src$$XMMRegister);
21895 %}
21896 ins_pipe( pipe_slow );
21897 %}
21898
21899 instruct vor_reg(vec dst, vec src1, vec src2) %{
21900 predicate(UseAVX > 0);
21901 match(Set dst (OrV src1 src2));
21902 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21903 ins_encode %{
21904 int vlen_enc = vector_length_encoding(this);
21905 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21906 %}
21907 ins_pipe( pipe_slow );
21908 %}
21909
21910 instruct vor_mem(vec dst, vec src, memory mem) %{
21911 predicate((UseAVX > 0) &&
21912 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21913 match(Set dst (OrV src (LoadVector mem)));
21914 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21915 ins_encode %{
21916 int vlen_enc = vector_length_encoding(this);
21917 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21918 %}
21919 ins_pipe( pipe_slow );
21920 %}
21921
21922 // --------------------------------- XOR --------------------------------------
21923
21924 instruct vxor(vec dst, vec src) %{
21925 predicate(UseAVX == 0);
21926 match(Set dst (XorV dst src));
21927 format %{ "pxor $dst,$src\t! xor vectors" %}
21928 ins_encode %{
21929 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21930 %}
21931 ins_pipe( pipe_slow );
21932 %}
21933
21934 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21935 predicate(UseAVX > 0);
21936 match(Set dst (XorV src1 src2));
21937 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21938 ins_encode %{
21939 int vlen_enc = vector_length_encoding(this);
21940 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21941 %}
21942 ins_pipe( pipe_slow );
21943 %}
21944
21945 instruct vxor_mem(vec dst, vec src, memory mem) %{
21946 predicate((UseAVX > 0) &&
21947 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21948 match(Set dst (XorV src (LoadVector mem)));
21949 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21950 ins_encode %{
21951 int vlen_enc = vector_length_encoding(this);
21952 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21953 %}
21954 ins_pipe( pipe_slow );
21955 %}
21956
21957 // --------------------------------- VectorCast --------------------------------------
21958
21959 instruct vcastBtoX(vec dst, vec src) %{
21960 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21961 match(Set dst (VectorCastB2X src));
21962 format %{ "vector_cast_b2x $dst,$src\t!" %}
21963 ins_encode %{
21964 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21965 int vlen_enc = vector_length_encoding(this);
21966 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21967 %}
21968 ins_pipe( pipe_slow );
21969 %}
21970
21971 instruct vcastBtoD(legVec dst, legVec src) %{
21972 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21973 match(Set dst (VectorCastB2X src));
21974 format %{ "vector_cast_b2x $dst,$src\t!" %}
21975 ins_encode %{
21976 int vlen_enc = vector_length_encoding(this);
21977 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21978 %}
21979 ins_pipe( pipe_slow );
21980 %}
21981
21982 instruct castStoX(vec dst, vec src) %{
21983 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21984 Matcher::vector_length(n->in(1)) <= 8 && // src
21985 Matcher::vector_element_basic_type(n) == T_BYTE);
21986 match(Set dst (VectorCastS2X src));
21987 format %{ "vector_cast_s2x $dst,$src" %}
21988 ins_encode %{
21989 assert(UseAVX > 0, "required");
21990
21991 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21992 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21993 %}
21994 ins_pipe( pipe_slow );
21995 %}
21996
21997 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21998 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21999 Matcher::vector_length(n->in(1)) == 16 && // src
22000 Matcher::vector_element_basic_type(n) == T_BYTE);
22001 effect(TEMP dst, TEMP vtmp);
22002 match(Set dst (VectorCastS2X src));
22003 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22004 ins_encode %{
22005 assert(UseAVX > 0, "required");
22006
22007 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22008 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22009 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22010 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22011 %}
22012 ins_pipe( pipe_slow );
22013 %}
22014
22015 instruct vcastStoX_evex(vec dst, vec src) %{
22016 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22017 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22018 match(Set dst (VectorCastS2X src));
22019 format %{ "vector_cast_s2x $dst,$src\t!" %}
22020 ins_encode %{
22021 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22022 int src_vlen_enc = vector_length_encoding(this, $src);
22023 int vlen_enc = vector_length_encoding(this);
22024 switch (to_elem_bt) {
22025 case T_BYTE:
22026 if (!VM_Version::supports_avx512vl()) {
22027 vlen_enc = Assembler::AVX_512bit;
22028 }
22029 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22030 break;
22031 case T_INT:
22032 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22033 break;
22034 case T_FLOAT:
22035 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22037 break;
22038 case T_LONG:
22039 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22040 break;
22041 case T_DOUBLE: {
22042 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22043 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22044 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22045 break;
22046 }
22047 default:
22048 ShouldNotReachHere();
22049 }
22050 %}
22051 ins_pipe( pipe_slow );
22052 %}
22053
22054 instruct castItoX(vec dst, vec src) %{
22055 predicate(UseAVX <= 2 &&
22056 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22057 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22058 match(Set dst (VectorCastI2X src));
22059 format %{ "vector_cast_i2x $dst,$src" %}
22060 ins_encode %{
22061 assert(UseAVX > 0, "required");
22062
22063 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22064 int vlen_enc = vector_length_encoding(this, $src);
22065
22066 if (to_elem_bt == T_BYTE) {
22067 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22068 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22069 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22070 } else {
22071 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22072 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22073 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22074 }
22075 %}
22076 ins_pipe( pipe_slow );
22077 %}
22078
22079 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22080 predicate(UseAVX <= 2 &&
22081 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22082 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22083 match(Set dst (VectorCastI2X src));
22084 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22085 effect(TEMP dst, TEMP vtmp);
22086 ins_encode %{
22087 assert(UseAVX > 0, "required");
22088
22089 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22090 int vlen_enc = vector_length_encoding(this, $src);
22091
22092 if (to_elem_bt == T_BYTE) {
22093 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22094 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22095 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22096 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22097 } else {
22098 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22099 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22100 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22101 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22102 }
22103 %}
22104 ins_pipe( pipe_slow );
22105 %}
22106
22107 instruct vcastItoX_evex(vec dst, vec src) %{
22108 predicate(UseAVX > 2 ||
22109 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22110 match(Set dst (VectorCastI2X src));
22111 format %{ "vector_cast_i2x $dst,$src\t!" %}
22112 ins_encode %{
22113 assert(UseAVX > 0, "required");
22114
22115 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22116 int src_vlen_enc = vector_length_encoding(this, $src);
22117 int dst_vlen_enc = vector_length_encoding(this);
22118 switch (dst_elem_bt) {
22119 case T_BYTE:
22120 if (!VM_Version::supports_avx512vl()) {
22121 src_vlen_enc = Assembler::AVX_512bit;
22122 }
22123 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22124 break;
22125 case T_SHORT:
22126 if (!VM_Version::supports_avx512vl()) {
22127 src_vlen_enc = Assembler::AVX_512bit;
22128 }
22129 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22130 break;
22131 case T_FLOAT:
22132 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133 break;
22134 case T_LONG:
22135 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136 break;
22137 case T_DOUBLE:
22138 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22139 break;
22140 default:
22141 ShouldNotReachHere();
22142 }
22143 %}
22144 ins_pipe( pipe_slow );
22145 %}
22146
22147 instruct vcastLtoBS(vec dst, vec src) %{
22148 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22149 UseAVX <= 2);
22150 match(Set dst (VectorCastL2X src));
22151 format %{ "vector_cast_l2x $dst,$src" %}
22152 ins_encode %{
22153 assert(UseAVX > 0, "required");
22154
22155 int vlen = Matcher::vector_length_in_bytes(this, $src);
22156 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22157 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22158 : ExternalAddress(vector_int_to_short_mask());
22159 if (vlen <= 16) {
22160 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22161 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22162 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163 } else {
22164 assert(vlen <= 32, "required");
22165 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22166 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22167 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22168 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22169 }
22170 if (to_elem_bt == T_BYTE) {
22171 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22172 }
22173 %}
22174 ins_pipe( pipe_slow );
22175 %}
22176
22177 instruct vcastLtoX_evex(vec dst, vec src) %{
22178 predicate(UseAVX > 2 ||
22179 (Matcher::vector_element_basic_type(n) == T_INT ||
22180 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22181 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22182 match(Set dst (VectorCastL2X src));
22183 format %{ "vector_cast_l2x $dst,$src\t!" %}
22184 ins_encode %{
22185 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22186 int vlen = Matcher::vector_length_in_bytes(this, $src);
22187 int vlen_enc = vector_length_encoding(this, $src);
22188 switch (to_elem_bt) {
22189 case T_BYTE:
22190 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22191 vlen_enc = Assembler::AVX_512bit;
22192 }
22193 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22194 break;
22195 case T_SHORT:
22196 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22197 vlen_enc = Assembler::AVX_512bit;
22198 }
22199 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22200 break;
22201 case T_INT:
22202 if (vlen == 8) {
22203 if ($dst$$XMMRegister != $src$$XMMRegister) {
22204 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22205 }
22206 } else if (vlen == 16) {
22207 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22208 } else if (vlen == 32) {
22209 if (UseAVX > 2) {
22210 if (!VM_Version::supports_avx512vl()) {
22211 vlen_enc = Assembler::AVX_512bit;
22212 }
22213 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214 } else {
22215 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22216 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22217 }
22218 } else { // vlen == 64
22219 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22220 }
22221 break;
22222 case T_FLOAT:
22223 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22224 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22225 break;
22226 case T_DOUBLE:
22227 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22228 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22229 break;
22230
22231 default: assert(false, "%s", type2name(to_elem_bt));
22232 }
22233 %}
22234 ins_pipe( pipe_slow );
22235 %}
22236
22237 instruct vcastFtoD_reg(vec dst, vec src) %{
22238 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22239 match(Set dst (VectorCastF2X src));
22240 format %{ "vector_cast_f2d $dst,$src\t!" %}
22241 ins_encode %{
22242 int vlen_enc = vector_length_encoding(this);
22243 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22244 %}
22245 ins_pipe( pipe_slow );
22246 %}
22247
22248
22249 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22250 predicate(!VM_Version::supports_avx10_2() &&
22251 !VM_Version::supports_avx512vl() &&
22252 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22253 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22254 is_integral_type(Matcher::vector_element_basic_type(n)));
22255 match(Set dst (VectorCastF2X src));
22256 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22257 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22258 ins_encode %{
22259 int vlen_enc = vector_length_encoding(this, $src);
22260 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22261 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22262 // 32 bit addresses for register indirect addressing mode since stub constants
22263 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22264 // However, targets are free to increase this limit, but having a large code cache size
22265 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22266 // cap we save a temporary register allocation which in limiting case can prevent
22267 // spilling in high register pressure blocks.
22268 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22269 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22270 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22271 %}
22272 ins_pipe( pipe_slow );
22273 %}
22274
22275 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22276 predicate(!VM_Version::supports_avx10_2() &&
22277 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22278 is_integral_type(Matcher::vector_element_basic_type(n)));
22279 match(Set dst (VectorCastF2X src));
22280 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22281 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22282 ins_encode %{
22283 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22284 if (to_elem_bt == T_LONG) {
22285 int vlen_enc = vector_length_encoding(this);
22286 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22287 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22288 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22289 } else {
22290 int vlen_enc = vector_length_encoding(this, $src);
22291 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22292 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22293 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22294 }
22295 %}
22296 ins_pipe( pipe_slow );
22297 %}
22298
22299 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22300 predicate(VM_Version::supports_avx10_2() &&
22301 is_integral_type(Matcher::vector_element_basic_type(n)));
22302 match(Set dst (VectorCastF2X src));
22303 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22304 ins_encode %{
22305 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22306 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22307 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22308 %}
22309 ins_pipe( pipe_slow );
22310 %}
22311
22312 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22313 predicate(VM_Version::supports_avx10_2() &&
22314 is_integral_type(Matcher::vector_element_basic_type(n)));
22315 match(Set dst (VectorCastF2X (LoadVector src)));
22316 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22317 ins_encode %{
22318 int vlen = Matcher::vector_length(this);
22319 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22320 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22321 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22322 %}
22323 ins_pipe( pipe_slow );
22324 %}
22325
22326 instruct vcastDtoF_reg(vec dst, vec src) %{
22327 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22328 match(Set dst (VectorCastD2X src));
22329 format %{ "vector_cast_d2x $dst,$src\t!" %}
22330 ins_encode %{
22331 int vlen_enc = vector_length_encoding(this, $src);
22332 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22333 %}
22334 ins_pipe( pipe_slow );
22335 %}
22336
22337 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22338 predicate(!VM_Version::supports_avx10_2() &&
22339 !VM_Version::supports_avx512vl() &&
22340 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22341 is_integral_type(Matcher::vector_element_basic_type(n)));
22342 match(Set dst (VectorCastD2X src));
22343 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22344 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22345 ins_encode %{
22346 int vlen_enc = vector_length_encoding(this, $src);
22347 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22348 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22349 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22350 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22351 %}
22352 ins_pipe( pipe_slow );
22353 %}
22354
22355 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22356 predicate(!VM_Version::supports_avx10_2() &&
22357 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22358 is_integral_type(Matcher::vector_element_basic_type(n)));
22359 match(Set dst (VectorCastD2X src));
22360 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22361 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22362 ins_encode %{
22363 int vlen_enc = vector_length_encoding(this, $src);
22364 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22365 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22366 ExternalAddress(vector_float_signflip());
22367 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22368 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22369 %}
22370 ins_pipe( pipe_slow );
22371 %}
22372
22373 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22374 predicate(VM_Version::supports_avx10_2() &&
22375 is_integral_type(Matcher::vector_element_basic_type(n)));
22376 match(Set dst (VectorCastD2X src));
22377 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22378 ins_encode %{
22379 int vlen_enc = vector_length_encoding(this, $src);
22380 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22381 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22382 %}
22383 ins_pipe( pipe_slow );
22384 %}
22385
22386 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22387 predicate(VM_Version::supports_avx10_2() &&
22388 is_integral_type(Matcher::vector_element_basic_type(n)));
22389 match(Set dst (VectorCastD2X (LoadVector src)));
22390 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22391 ins_encode %{
22392 int vlen = Matcher::vector_length(this);
22393 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22394 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22395 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22396 %}
22397 ins_pipe( pipe_slow );
22398 %}
22399
22400 instruct vucast(vec dst, vec src) %{
22401 match(Set dst (VectorUCastB2X src));
22402 match(Set dst (VectorUCastS2X src));
22403 match(Set dst (VectorUCastI2X src));
22404 format %{ "vector_ucast $dst,$src\t!" %}
22405 ins_encode %{
22406 assert(UseAVX > 0, "required");
22407
22408 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22409 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22410 int vlen_enc = vector_length_encoding(this);
22411 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22412 %}
22413 ins_pipe( pipe_slow );
22414 %}
22415
22416 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22417 predicate(!VM_Version::supports_avx512vl() &&
22418 Matcher::vector_length_in_bytes(n) < 64 &&
22419 Matcher::vector_element_basic_type(n) == T_INT);
22420 match(Set dst (RoundVF src));
22421 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22422 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22423 ins_encode %{
22424 int vlen_enc = vector_length_encoding(this);
22425 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22426 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22427 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22428 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22429 %}
22430 ins_pipe( pipe_slow );
22431 %}
22432
22433 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22434 predicate((VM_Version::supports_avx512vl() ||
22435 Matcher::vector_length_in_bytes(n) == 64) &&
22436 Matcher::vector_element_basic_type(n) == T_INT);
22437 match(Set dst (RoundVF src));
22438 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22439 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22440 ins_encode %{
22441 int vlen_enc = vector_length_encoding(this);
22442 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22443 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22444 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22445 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22446 %}
22447 ins_pipe( pipe_slow );
22448 %}
22449
22450 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22451 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22452 match(Set dst (RoundVD src));
22453 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22454 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22455 ins_encode %{
22456 int vlen_enc = vector_length_encoding(this);
22457 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22458 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22459 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22460 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22461 %}
22462 ins_pipe( pipe_slow );
22463 %}
22464
22465 // --------------------------------- VectorMaskCmp --------------------------------------
22466
22467 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22468 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22469 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22470 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22471 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22472 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22473 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22474 ins_encode %{
22475 int vlen_enc = vector_length_encoding(this, $src1);
22476 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22477 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22478 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22479 } else {
22480 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22481 }
22482 %}
22483 ins_pipe( pipe_slow );
22484 %}
22485
22486 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22487 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22488 n->bottom_type()->isa_vectmask() == nullptr &&
22489 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22490 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22491 effect(TEMP ktmp);
22492 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22493 ins_encode %{
22494 int vlen_enc = Assembler::AVX_512bit;
22495 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22496 KRegister mask = k0; // The comparison itself is not being masked.
22497 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22498 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22499 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22500 } else {
22501 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22502 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22503 }
22504 %}
22505 ins_pipe( pipe_slow );
22506 %}
22507
22508 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22509 predicate(n->bottom_type()->isa_vectmask() &&
22510 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22511 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22512 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22513 ins_encode %{
22514 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22515 int vlen_enc = vector_length_encoding(this, $src1);
22516 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22517 KRegister mask = k0; // The comparison itself is not being masked.
22518 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22519 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22520 } else {
22521 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22522 }
22523 %}
22524 ins_pipe( pipe_slow );
22525 %}
22526
22527 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22528 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22529 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22530 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22531 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22532 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22533 (n->in(2)->get_int() == BoolTest::eq ||
22534 n->in(2)->get_int() == BoolTest::lt ||
22535 n->in(2)->get_int() == BoolTest::gt)); // cond
22536 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22537 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22538 ins_encode %{
22539 int vlen_enc = vector_length_encoding(this, $src1);
22540 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22541 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22542 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22543 %}
22544 ins_pipe( pipe_slow );
22545 %}
22546
22547 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22548 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22549 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22550 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22551 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22552 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22553 (n->in(2)->get_int() == BoolTest::ne ||
22554 n->in(2)->get_int() == BoolTest::le ||
22555 n->in(2)->get_int() == BoolTest::ge)); // cond
22556 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22557 effect(TEMP dst, TEMP xtmp);
22558 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22559 ins_encode %{
22560 int vlen_enc = vector_length_encoding(this, $src1);
22561 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22562 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22563 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22564 %}
22565 ins_pipe( pipe_slow );
22566 %}
22567
22568 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22569 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22570 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22571 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22572 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22573 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22574 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22575 effect(TEMP dst, TEMP xtmp);
22576 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22577 ins_encode %{
22578 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22579 int vlen_enc = vector_length_encoding(this, $src1);
22580 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22581 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22582
22583 if (vlen_enc == Assembler::AVX_128bit) {
22584 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22585 } else {
22586 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22587 }
22588 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22589 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22590 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22591 %}
22592 ins_pipe( pipe_slow );
22593 %}
22594
22595 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22596 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22597 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22598 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22599 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22600 effect(TEMP ktmp);
22601 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22602 ins_encode %{
22603 assert(UseAVX > 2, "required");
22604
22605 int vlen_enc = vector_length_encoding(this, $src1);
22606 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22607 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22608 KRegister mask = k0; // The comparison itself is not being masked.
22609 bool merge = false;
22610 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22611
22612 switch (src1_elem_bt) {
22613 case T_INT: {
22614 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22615 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22616 break;
22617 }
22618 case T_LONG: {
22619 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22620 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22621 break;
22622 }
22623 default: assert(false, "%s", type2name(src1_elem_bt));
22624 }
22625 %}
22626 ins_pipe( pipe_slow );
22627 %}
22628
22629
22630 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22631 predicate(n->bottom_type()->isa_vectmask() &&
22632 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22633 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22634 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22635 ins_encode %{
22636 assert(UseAVX > 2, "required");
22637 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22638
22639 int vlen_enc = vector_length_encoding(this, $src1);
22640 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22641 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22642 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22643
22644 // Comparison i
22645 switch (src1_elem_bt) {
22646 case T_BYTE: {
22647 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22648 break;
22649 }
22650 case T_SHORT: {
22651 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22652 break;
22653 }
22654 case T_INT: {
22655 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22656 break;
22657 }
22658 case T_LONG: {
22659 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22660 break;
22661 }
22662 default: assert(false, "%s", type2name(src1_elem_bt));
22663 }
22664 %}
22665 ins_pipe( pipe_slow );
22666 %}
22667
22668 // Extract
22669
22670 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22671 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22672 match(Set dst (ExtractI src idx));
22673 match(Set dst (ExtractS src idx));
22674 match(Set dst (ExtractB src idx));
22675 format %{ "extractI $dst,$src,$idx\t!" %}
22676 ins_encode %{
22677 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22678
22679 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22680 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22681 %}
22682 ins_pipe( pipe_slow );
22683 %}
22684
22685 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22686 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22687 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22688 match(Set dst (ExtractI src idx));
22689 match(Set dst (ExtractS src idx));
22690 match(Set dst (ExtractB src idx));
22691 effect(TEMP vtmp);
22692 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22693 ins_encode %{
22694 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22695
22696 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22697 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22698 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22699 %}
22700 ins_pipe( pipe_slow );
22701 %}
22702
22703 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22704 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22705 match(Set dst (ExtractL src idx));
22706 format %{ "extractL $dst,$src,$idx\t!" %}
22707 ins_encode %{
22708 assert(UseSSE >= 4, "required");
22709 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22710
22711 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22712 %}
22713 ins_pipe( pipe_slow );
22714 %}
22715
22716 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22717 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22718 Matcher::vector_length(n->in(1)) == 8); // src
22719 match(Set dst (ExtractL src idx));
22720 effect(TEMP vtmp);
22721 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22722 ins_encode %{
22723 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22724
22725 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22726 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22727 %}
22728 ins_pipe( pipe_slow );
22729 %}
22730
22731 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22732 predicate(Matcher::vector_length(n->in(1)) <= 4);
22733 match(Set dst (ExtractF src idx));
22734 effect(TEMP dst, TEMP vtmp);
22735 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22736 ins_encode %{
22737 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22738
22739 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22740 %}
22741 ins_pipe( pipe_slow );
22742 %}
22743
22744 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22745 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22746 Matcher::vector_length(n->in(1)/*src*/) == 16);
22747 match(Set dst (ExtractF src idx));
22748 effect(TEMP vtmp);
22749 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22750 ins_encode %{
22751 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22752
22753 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22754 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22755 %}
22756 ins_pipe( pipe_slow );
22757 %}
22758
22759 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22760 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22761 match(Set dst (ExtractD src idx));
22762 format %{ "extractD $dst,$src,$idx\t!" %}
22763 ins_encode %{
22764 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22765
22766 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22767 %}
22768 ins_pipe( pipe_slow );
22769 %}
22770
22771 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22772 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22773 Matcher::vector_length(n->in(1)) == 8); // src
22774 match(Set dst (ExtractD src idx));
22775 effect(TEMP vtmp);
22776 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22777 ins_encode %{
22778 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22779
22780 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22781 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22782 %}
22783 ins_pipe( pipe_slow );
22784 %}
22785
22786 // --------------------------------- Vector Blend --------------------------------------
22787
22788 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22789 predicate(UseAVX == 0);
22790 match(Set dst (VectorBlend (Binary dst src) mask));
22791 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22792 effect(TEMP tmp);
22793 ins_encode %{
22794 assert(UseSSE >= 4, "required");
22795
22796 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22797 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22798 }
22799 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22800 %}
22801 ins_pipe( pipe_slow );
22802 %}
22803
22804 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22805 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22806 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22807 Matcher::vector_length_in_bytes(n) <= 32 &&
22808 is_integral_type(Matcher::vector_element_basic_type(n)));
22809 match(Set dst (VectorBlend (Binary src1 src2) mask));
22810 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22811 ins_encode %{
22812 int vlen_enc = vector_length_encoding(this);
22813 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22814 %}
22815 ins_pipe( pipe_slow );
22816 %}
22817
22818 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22819 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22820 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22821 Matcher::vector_length_in_bytes(n) <= 32 &&
22822 !is_integral_type(Matcher::vector_element_basic_type(n)));
22823 match(Set dst (VectorBlend (Binary src1 src2) mask));
22824 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22825 ins_encode %{
22826 int vlen_enc = vector_length_encoding(this);
22827 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22828 %}
22829 ins_pipe( pipe_slow );
22830 %}
22831
22832 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22833 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22834 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22835 Matcher::vector_length_in_bytes(n) <= 32);
22836 match(Set dst (VectorBlend (Binary src1 src2) mask));
22837 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22838 effect(TEMP vtmp, TEMP dst);
22839 ins_encode %{
22840 int vlen_enc = vector_length_encoding(this);
22841 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22842 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22843 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22844 %}
22845 ins_pipe( pipe_slow );
22846 %}
22847
22848 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22849 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22850 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22851 match(Set dst (VectorBlend (Binary src1 src2) mask));
22852 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22853 effect(TEMP ktmp);
22854 ins_encode %{
22855 int vlen_enc = Assembler::AVX_512bit;
22856 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22857 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22858 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22859 %}
22860 ins_pipe( pipe_slow );
22861 %}
22862
22863
22864 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22865 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22866 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22867 VM_Version::supports_avx512bw()));
22868 match(Set dst (VectorBlend (Binary src1 src2) mask));
22869 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22870 ins_encode %{
22871 int vlen_enc = vector_length_encoding(this);
22872 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22873 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22874 %}
22875 ins_pipe( pipe_slow );
22876 %}
22877
22878 // --------------------------------- ABS --------------------------------------
22879 // a = |a|
22880 instruct vabsB_reg(vec dst, vec src) %{
22881 match(Set dst (AbsVB src));
22882 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22883 ins_encode %{
22884 uint vlen = Matcher::vector_length(this);
22885 if (vlen <= 16) {
22886 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22887 } else {
22888 int vlen_enc = vector_length_encoding(this);
22889 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22890 }
22891 %}
22892 ins_pipe( pipe_slow );
22893 %}
22894
22895 instruct vabsS_reg(vec dst, vec src) %{
22896 match(Set dst (AbsVS src));
22897 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22898 ins_encode %{
22899 uint vlen = Matcher::vector_length(this);
22900 if (vlen <= 8) {
22901 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22902 } else {
22903 int vlen_enc = vector_length_encoding(this);
22904 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22905 }
22906 %}
22907 ins_pipe( pipe_slow );
22908 %}
22909
22910 instruct vabsI_reg(vec dst, vec src) %{
22911 match(Set dst (AbsVI src));
22912 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22913 ins_encode %{
22914 uint vlen = Matcher::vector_length(this);
22915 if (vlen <= 4) {
22916 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22917 } else {
22918 int vlen_enc = vector_length_encoding(this);
22919 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22920 }
22921 %}
22922 ins_pipe( pipe_slow );
22923 %}
22924
22925 instruct vabsL_reg(vec dst, vec src) %{
22926 match(Set dst (AbsVL src));
22927 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22928 ins_encode %{
22929 assert(UseAVX > 2, "required");
22930 int vlen_enc = vector_length_encoding(this);
22931 if (!VM_Version::supports_avx512vl()) {
22932 vlen_enc = Assembler::AVX_512bit;
22933 }
22934 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22935 %}
22936 ins_pipe( pipe_slow );
22937 %}
22938
22939 // --------------------------------- ABSNEG --------------------------------------
22940
22941 instruct vabsnegF(vec dst, vec src) %{
22942 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22943 match(Set dst (AbsVF src));
22944 match(Set dst (NegVF src));
22945 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22946 ins_cost(150);
22947 ins_encode %{
22948 int opcode = this->ideal_Opcode();
22949 int vlen = Matcher::vector_length(this);
22950 if (vlen == 2) {
22951 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22952 } else {
22953 assert(vlen == 8 || vlen == 16, "required");
22954 int vlen_enc = vector_length_encoding(this);
22955 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956 }
22957 %}
22958 ins_pipe( pipe_slow );
22959 %}
22960
22961 instruct vabsneg4F(vec dst) %{
22962 predicate(Matcher::vector_length(n) == 4);
22963 match(Set dst (AbsVF dst));
22964 match(Set dst (NegVF dst));
22965 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22966 ins_cost(150);
22967 ins_encode %{
22968 int opcode = this->ideal_Opcode();
22969 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22970 %}
22971 ins_pipe( pipe_slow );
22972 %}
22973
22974 instruct vabsnegD(vec dst, vec src) %{
22975 match(Set dst (AbsVD src));
22976 match(Set dst (NegVD src));
22977 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22978 ins_encode %{
22979 int opcode = this->ideal_Opcode();
22980 uint vlen = Matcher::vector_length(this);
22981 if (vlen == 2) {
22982 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22983 } else {
22984 int vlen_enc = vector_length_encoding(this);
22985 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22986 }
22987 %}
22988 ins_pipe( pipe_slow );
22989 %}
22990
22991 //------------------------------------- VectorTest --------------------------------------------
22992
22993 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22994 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22995 match(Set cr (VectorTest src1 src2));
22996 effect(TEMP vtmp);
22997 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22998 ins_encode %{
22999 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23000 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23001 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23002 %}
23003 ins_pipe( pipe_slow );
23004 %}
23005
23006 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23007 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23008 match(Set cr (VectorTest src1 src2));
23009 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23010 ins_encode %{
23011 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23012 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23013 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23014 %}
23015 ins_pipe( pipe_slow );
23016 %}
23017
23018 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23019 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23020 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23021 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23022 match(Set cr (VectorTest src1 src2));
23023 effect(TEMP tmp);
23024 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23025 ins_encode %{
23026 uint masklen = Matcher::vector_length(this, $src1);
23027 __ kmovwl($tmp$$Register, $src1$$KRegister);
23028 __ andl($tmp$$Register, (1 << masklen) - 1);
23029 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23030 %}
23031 ins_pipe( pipe_slow );
23032 %}
23033
23034 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23035 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23036 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23037 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23038 match(Set cr (VectorTest src1 src2));
23039 effect(TEMP tmp);
23040 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23041 ins_encode %{
23042 uint masklen = Matcher::vector_length(this, $src1);
23043 __ kmovwl($tmp$$Register, $src1$$KRegister);
23044 __ andl($tmp$$Register, (1 << masklen) - 1);
23045 %}
23046 ins_pipe( pipe_slow );
23047 %}
23048
23049 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23050 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23051 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23052 match(Set cr (VectorTest src1 src2));
23053 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23054 ins_encode %{
23055 uint masklen = Matcher::vector_length(this, $src1);
23056 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23057 %}
23058 ins_pipe( pipe_slow );
23059 %}
23060
23061 //------------------------------------- LoadMask --------------------------------------------
23062
23063 instruct loadMask(legVec dst, legVec src) %{
23064 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23065 match(Set dst (VectorLoadMask src));
23066 effect(TEMP dst);
23067 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23068 ins_encode %{
23069 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23070 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23071 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23072 %}
23073 ins_pipe( pipe_slow );
23074 %}
23075
23076 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23077 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23078 match(Set dst (VectorLoadMask src));
23079 effect(TEMP xtmp);
23080 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23081 ins_encode %{
23082 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23083 true, Assembler::AVX_512bit);
23084 %}
23085 ins_pipe( pipe_slow );
23086 %}
23087
23088 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23089 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23090 match(Set dst (VectorLoadMask src));
23091 effect(TEMP xtmp);
23092 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23093 ins_encode %{
23094 int vlen_enc = vector_length_encoding(in(1));
23095 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23096 false, vlen_enc);
23097 %}
23098 ins_pipe( pipe_slow );
23099 %}
23100
23101 //------------------------------------- StoreMask --------------------------------------------
23102
23103 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23104 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23105 match(Set dst (VectorStoreMask src size));
23106 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23107 ins_encode %{
23108 int vlen = Matcher::vector_length(this);
23109 if (vlen <= 16 && UseAVX <= 2) {
23110 assert(UseSSE >= 3, "required");
23111 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23112 } else {
23113 assert(UseAVX > 0, "required");
23114 int src_vlen_enc = vector_length_encoding(this, $src);
23115 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23116 }
23117 %}
23118 ins_pipe( pipe_slow );
23119 %}
23120
23121 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23122 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23123 match(Set dst (VectorStoreMask src size));
23124 effect(TEMP_DEF dst, TEMP xtmp);
23125 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23126 ins_encode %{
23127 int vlen_enc = Assembler::AVX_128bit;
23128 int vlen = Matcher::vector_length(this);
23129 if (vlen <= 8) {
23130 assert(UseSSE >= 3, "required");
23131 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23132 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23133 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23134 } else {
23135 assert(UseAVX > 0, "required");
23136 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23137 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23138 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23139 }
23140 %}
23141 ins_pipe( pipe_slow );
23142 %}
23143
23144 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23145 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23146 match(Set dst (VectorStoreMask src size));
23147 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23148 effect(TEMP_DEF dst, TEMP xtmp);
23149 ins_encode %{
23150 int vlen_enc = Assembler::AVX_128bit;
23151 int vlen = Matcher::vector_length(this);
23152 if (vlen <= 4) {
23153 assert(UseSSE >= 3, "required");
23154 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23155 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23156 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23157 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23158 } else {
23159 assert(UseAVX > 0, "required");
23160 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23161 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23162 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23163 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23164 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23165 }
23166 %}
23167 ins_pipe( pipe_slow );
23168 %}
23169
23170 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23171 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23172 match(Set dst (VectorStoreMask src size));
23173 effect(TEMP_DEF dst, TEMP xtmp);
23174 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23175 ins_encode %{
23176 assert(UseSSE >= 3, "required");
23177 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23178 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23179 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23180 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23181 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23182 %}
23183 ins_pipe( pipe_slow );
23184 %}
23185
23186 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23187 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23188 match(Set dst (VectorStoreMask src size));
23189 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23190 effect(TEMP_DEF dst, TEMP vtmp);
23191 ins_encode %{
23192 int vlen_enc = Assembler::AVX_128bit;
23193 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23194 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23195 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23196 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23197 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23198 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23199 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23200 %}
23201 ins_pipe( pipe_slow );
23202 %}
23203
23204 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23205 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23206 match(Set dst (VectorStoreMask src size));
23207 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23208 ins_encode %{
23209 int src_vlen_enc = vector_length_encoding(this, $src);
23210 int dst_vlen_enc = vector_length_encoding(this);
23211 if (!VM_Version::supports_avx512vl()) {
23212 src_vlen_enc = Assembler::AVX_512bit;
23213 }
23214 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23215 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23216 %}
23217 ins_pipe( pipe_slow );
23218 %}
23219
23220 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23221 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23222 match(Set dst (VectorStoreMask src size));
23223 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23224 ins_encode %{
23225 int src_vlen_enc = vector_length_encoding(this, $src);
23226 int dst_vlen_enc = vector_length_encoding(this);
23227 if (!VM_Version::supports_avx512vl()) {
23228 src_vlen_enc = Assembler::AVX_512bit;
23229 }
23230 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23231 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23232 %}
23233 ins_pipe( pipe_slow );
23234 %}
23235
23236 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23237 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23238 match(Set dst (VectorStoreMask mask size));
23239 effect(TEMP_DEF dst);
23240 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23241 ins_encode %{
23242 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23243 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23244 false, Assembler::AVX_512bit, noreg);
23245 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23246 %}
23247 ins_pipe( pipe_slow );
23248 %}
23249
23250 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23251 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23252 match(Set dst (VectorStoreMask mask size));
23253 effect(TEMP_DEF dst);
23254 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23255 ins_encode %{
23256 int dst_vlen_enc = vector_length_encoding(this);
23257 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23258 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23259 %}
23260 ins_pipe( pipe_slow );
23261 %}
23262
23263 instruct vmaskcast_evex(kReg dst) %{
23264 match(Set dst (VectorMaskCast dst));
23265 ins_cost(0);
23266 format %{ "vector_mask_cast $dst" %}
23267 ins_encode %{
23268 // empty
23269 %}
23270 ins_pipe(empty);
23271 %}
23272
23273 instruct vmaskcast(vec dst) %{
23274 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23275 match(Set dst (VectorMaskCast dst));
23276 ins_cost(0);
23277 format %{ "vector_mask_cast $dst" %}
23278 ins_encode %{
23279 // empty
23280 %}
23281 ins_pipe(empty);
23282 %}
23283
23284 instruct vmaskcast_avx(vec dst, vec src) %{
23285 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23286 match(Set dst (VectorMaskCast src));
23287 format %{ "vector_mask_cast $dst, $src" %}
23288 ins_encode %{
23289 int vlen = Matcher::vector_length(this);
23290 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23291 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23292 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23293 %}
23294 ins_pipe(pipe_slow);
23295 %}
23296
23297 //-------------------------------- Load Iota Indices ----------------------------------
23298
23299 instruct loadIotaIndices(vec dst, immI_0 src) %{
23300 match(Set dst (VectorLoadConst src));
23301 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23302 ins_encode %{
23303 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23304 BasicType bt = Matcher::vector_element_basic_type(this);
23305 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23306 %}
23307 ins_pipe( pipe_slow );
23308 %}
23309
23310 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23311 match(Set dst (PopulateIndex src1 src2));
23312 effect(TEMP dst, TEMP vtmp);
23313 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23314 ins_encode %{
23315 assert($src2$$constant == 1, "required");
23316 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23317 int vlen_enc = vector_length_encoding(this);
23318 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23319 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23320 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23321 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23322 %}
23323 ins_pipe( pipe_slow );
23324 %}
23325
23326 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23327 match(Set dst (PopulateIndex src1 src2));
23328 effect(TEMP dst, TEMP vtmp);
23329 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23330 ins_encode %{
23331 assert($src2$$constant == 1, "required");
23332 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23333 int vlen_enc = vector_length_encoding(this);
23334 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23335 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23336 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23337 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23338 %}
23339 ins_pipe( pipe_slow );
23340 %}
23341
23342 //-------------------------------- Rearrange ----------------------------------
23343
23344 // LoadShuffle/Rearrange for Byte
23345 instruct rearrangeB(vec dst, vec shuffle) %{
23346 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23347 Matcher::vector_length(n) < 32);
23348 match(Set dst (VectorRearrange dst shuffle));
23349 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23350 ins_encode %{
23351 assert(UseSSE >= 4, "required");
23352 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23353 %}
23354 ins_pipe( pipe_slow );
23355 %}
23356
23357 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23358 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23359 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23360 match(Set dst (VectorRearrange src shuffle));
23361 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23362 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23363 ins_encode %{
23364 assert(UseAVX >= 2, "required");
23365 // Swap src into vtmp1
23366 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23367 // Shuffle swapped src to get entries from other 128 bit lane
23368 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23369 // Shuffle original src to get entries from self 128 bit lane
23370 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23371 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23372 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23373 // Perform the blend
23374 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23375 %}
23376 ins_pipe( pipe_slow );
23377 %}
23378
23379
23380 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23381 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23382 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23383 match(Set dst (VectorRearrange src shuffle));
23384 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23385 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23386 ins_encode %{
23387 int vlen_enc = vector_length_encoding(this);
23388 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23389 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23390 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23391 %}
23392 ins_pipe( pipe_slow );
23393 %}
23394
23395 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23396 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23397 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23398 match(Set dst (VectorRearrange src shuffle));
23399 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23400 ins_encode %{
23401 int vlen_enc = vector_length_encoding(this);
23402 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23403 %}
23404 ins_pipe( pipe_slow );
23405 %}
23406
23407 // LoadShuffle/Rearrange for Short
23408
23409 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23410 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23411 !VM_Version::supports_avx512bw());
23412 match(Set dst (VectorLoadShuffle src));
23413 effect(TEMP dst, TEMP vtmp);
23414 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23415 ins_encode %{
23416 // Create a byte shuffle mask from short shuffle mask
23417 // only byte shuffle instruction available on these platforms
23418 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23419 if (UseAVX == 0) {
23420 assert(vlen_in_bytes <= 16, "required");
23421 // Multiply each shuffle by two to get byte index
23422 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23423 __ psllw($vtmp$$XMMRegister, 1);
23424
23425 // Duplicate to create 2 copies of byte index
23426 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23427 __ psllw($dst$$XMMRegister, 8);
23428 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23429
23430 // Add one to get alternate byte index
23431 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23432 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23433 } else {
23434 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23435 int vlen_enc = vector_length_encoding(this);
23436 // Multiply each shuffle by two to get byte index
23437 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23438
23439 // Duplicate to create 2 copies of byte index
23440 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23441 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23442
23443 // Add one to get alternate byte index
23444 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23445 }
23446 %}
23447 ins_pipe( pipe_slow );
23448 %}
23449
23450 instruct rearrangeS(vec dst, vec shuffle) %{
23451 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23452 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23453 match(Set dst (VectorRearrange dst shuffle));
23454 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23455 ins_encode %{
23456 assert(UseSSE >= 4, "required");
23457 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23458 %}
23459 ins_pipe( pipe_slow );
23460 %}
23461
23462 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23463 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23464 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23465 match(Set dst (VectorRearrange src shuffle));
23466 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23467 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23468 ins_encode %{
23469 assert(UseAVX >= 2, "required");
23470 // Swap src into vtmp1
23471 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23472 // Shuffle swapped src to get entries from other 128 bit lane
23473 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23474 // Shuffle original src to get entries from self 128 bit lane
23475 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23476 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23477 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23478 // Perform the blend
23479 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23480 %}
23481 ins_pipe( pipe_slow );
23482 %}
23483
23484 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23485 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23486 VM_Version::supports_avx512bw());
23487 match(Set dst (VectorRearrange src shuffle));
23488 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23489 ins_encode %{
23490 int vlen_enc = vector_length_encoding(this);
23491 if (!VM_Version::supports_avx512vl()) {
23492 vlen_enc = Assembler::AVX_512bit;
23493 }
23494 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23495 %}
23496 ins_pipe( pipe_slow );
23497 %}
23498
23499 // LoadShuffle/Rearrange for Integer and Float
23500
23501 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23502 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23503 Matcher::vector_length(n) == 4 && UseAVX == 0);
23504 match(Set dst (VectorLoadShuffle src));
23505 effect(TEMP dst, TEMP vtmp);
23506 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23507 ins_encode %{
23508 assert(UseSSE >= 4, "required");
23509
23510 // Create a byte shuffle mask from int shuffle mask
23511 // only byte shuffle instruction available on these platforms
23512
23513 // Duplicate and multiply each shuffle by 4
23514 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23515 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23516 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23517 __ psllw($vtmp$$XMMRegister, 2);
23518
23519 // Duplicate again to create 4 copies of byte index
23520 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23521 __ psllw($dst$$XMMRegister, 8);
23522 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23523
23524 // Add 3,2,1,0 to get alternate byte index
23525 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23526 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23527 %}
23528 ins_pipe( pipe_slow );
23529 %}
23530
23531 instruct rearrangeI(vec dst, vec shuffle) %{
23532 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23533 UseAVX == 0);
23534 match(Set dst (VectorRearrange dst shuffle));
23535 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23536 ins_encode %{
23537 assert(UseSSE >= 4, "required");
23538 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23539 %}
23540 ins_pipe( pipe_slow );
23541 %}
23542
23543 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23544 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23545 UseAVX > 0);
23546 match(Set dst (VectorRearrange src shuffle));
23547 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23548 ins_encode %{
23549 int vlen_enc = vector_length_encoding(this);
23550 BasicType bt = Matcher::vector_element_basic_type(this);
23551 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23552 %}
23553 ins_pipe( pipe_slow );
23554 %}
23555
23556 // LoadShuffle/Rearrange for Long and Double
23557
23558 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23559 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23560 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23561 match(Set dst (VectorLoadShuffle src));
23562 effect(TEMP dst, TEMP vtmp);
23563 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23564 ins_encode %{
23565 assert(UseAVX >= 2, "required");
23566
23567 int vlen_enc = vector_length_encoding(this);
23568 // Create a double word shuffle mask from long shuffle mask
23569 // only double word shuffle instruction available on these platforms
23570
23571 // Multiply each shuffle by two to get double word index
23572 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23573
23574 // Duplicate each double word shuffle
23575 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23576 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23577
23578 // Add one to get alternate double word index
23579 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23580 %}
23581 ins_pipe( pipe_slow );
23582 %}
23583
23584 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23585 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23586 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23587 match(Set dst (VectorRearrange src shuffle));
23588 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23589 ins_encode %{
23590 assert(UseAVX >= 2, "required");
23591
23592 int vlen_enc = vector_length_encoding(this);
23593 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23594 %}
23595 ins_pipe( pipe_slow );
23596 %}
23597
23598 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23599 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23600 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23601 match(Set dst (VectorRearrange src shuffle));
23602 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23603 ins_encode %{
23604 assert(UseAVX > 2, "required");
23605
23606 int vlen_enc = vector_length_encoding(this);
23607 if (vlen_enc == Assembler::AVX_128bit) {
23608 vlen_enc = Assembler::AVX_256bit;
23609 }
23610 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23611 %}
23612 ins_pipe( pipe_slow );
23613 %}
23614
23615 // --------------------------------- FMA --------------------------------------
23616 // a * b + c
23617
23618 instruct vfmaF_reg(vec a, vec b, vec c) %{
23619 match(Set c (FmaVF c (Binary a b)));
23620 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23621 ins_cost(150);
23622 ins_encode %{
23623 assert(UseFMA, "not enabled");
23624 int vlen_enc = vector_length_encoding(this);
23625 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23626 %}
23627 ins_pipe( pipe_slow );
23628 %}
23629
23630 instruct vfmaF_mem(vec a, memory b, vec c) %{
23631 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23632 match(Set c (FmaVF c (Binary a (LoadVector b))));
23633 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23634 ins_cost(150);
23635 ins_encode %{
23636 assert(UseFMA, "not enabled");
23637 int vlen_enc = vector_length_encoding(this);
23638 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23639 %}
23640 ins_pipe( pipe_slow );
23641 %}
23642
23643 instruct vfmaD_reg(vec a, vec b, vec c) %{
23644 match(Set c (FmaVD c (Binary a b)));
23645 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23646 ins_cost(150);
23647 ins_encode %{
23648 assert(UseFMA, "not enabled");
23649 int vlen_enc = vector_length_encoding(this);
23650 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23651 %}
23652 ins_pipe( pipe_slow );
23653 %}
23654
23655 instruct vfmaD_mem(vec a, memory b, vec c) %{
23656 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23657 match(Set c (FmaVD c (Binary a (LoadVector b))));
23658 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23659 ins_cost(150);
23660 ins_encode %{
23661 assert(UseFMA, "not enabled");
23662 int vlen_enc = vector_length_encoding(this);
23663 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23664 %}
23665 ins_pipe( pipe_slow );
23666 %}
23667
23668 // --------------------------------- Vector Multiply Add --------------------------------------
23669
23670 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23671 predicate(UseAVX == 0);
23672 match(Set dst (MulAddVS2VI dst src1));
23673 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23674 ins_encode %{
23675 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23676 %}
23677 ins_pipe( pipe_slow );
23678 %}
23679
23680 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23681 predicate(UseAVX > 0);
23682 match(Set dst (MulAddVS2VI src1 src2));
23683 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23684 ins_encode %{
23685 int vlen_enc = vector_length_encoding(this);
23686 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23687 %}
23688 ins_pipe( pipe_slow );
23689 %}
23690
23691 // --------------------------------- Vector Multiply Add Add ----------------------------------
23692
23693 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23694 predicate(VM_Version::supports_avx512_vnni());
23695 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23696 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23697 ins_encode %{
23698 assert(UseAVX > 2, "required");
23699 int vlen_enc = vector_length_encoding(this);
23700 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23701 %}
23702 ins_pipe( pipe_slow );
23703 ins_cost(10);
23704 %}
23705
23706 // --------------------------------- PopCount --------------------------------------
23707
23708 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23709 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23710 match(Set dst (PopCountVI src));
23711 match(Set dst (PopCountVL src));
23712 format %{ "vector_popcount_integral $dst, $src" %}
23713 ins_encode %{
23714 int opcode = this->ideal_Opcode();
23715 int vlen_enc = vector_length_encoding(this, $src);
23716 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23717 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23718 %}
23719 ins_pipe( pipe_slow );
23720 %}
23721
23722 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23723 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23724 match(Set dst (PopCountVI src mask));
23725 match(Set dst (PopCountVL src mask));
23726 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23727 ins_encode %{
23728 int vlen_enc = vector_length_encoding(this, $src);
23729 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23730 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23731 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23732 %}
23733 ins_pipe( pipe_slow );
23734 %}
23735
23736 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23737 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23738 match(Set dst (PopCountVI src));
23739 match(Set dst (PopCountVL src));
23740 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23741 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23742 ins_encode %{
23743 int opcode = this->ideal_Opcode();
23744 int vlen_enc = vector_length_encoding(this, $src);
23745 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23746 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23747 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23748 %}
23749 ins_pipe( pipe_slow );
23750 %}
23751
23752 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23753
23754 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23755 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23756 Matcher::vector_length_in_bytes(n->in(1))));
23757 match(Set dst (CountTrailingZerosV src));
23758 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23759 ins_cost(400);
23760 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23761 ins_encode %{
23762 int vlen_enc = vector_length_encoding(this, $src);
23763 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23764 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23765 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23766 %}
23767 ins_pipe( pipe_slow );
23768 %}
23769
23770 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23771 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23772 VM_Version::supports_avx512cd() &&
23773 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23774 match(Set dst (CountTrailingZerosV src));
23775 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23776 ins_cost(400);
23777 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23778 ins_encode %{
23779 int vlen_enc = vector_length_encoding(this, $src);
23780 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23781 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23782 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23783 %}
23784 ins_pipe( pipe_slow );
23785 %}
23786
23787 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23788 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23789 match(Set dst (CountTrailingZerosV src));
23790 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23791 ins_cost(400);
23792 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23793 ins_encode %{
23794 int vlen_enc = vector_length_encoding(this, $src);
23795 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23796 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23797 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23798 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23799 %}
23800 ins_pipe( pipe_slow );
23801 %}
23802
23803 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23804 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23805 match(Set dst (CountTrailingZerosV src));
23806 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23807 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23808 ins_encode %{
23809 int vlen_enc = vector_length_encoding(this, $src);
23810 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23811 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23812 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23813 %}
23814 ins_pipe( pipe_slow );
23815 %}
23816
23817
23818 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23819
23820 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23821 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23822 effect(TEMP dst);
23823 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23824 ins_encode %{
23825 int vector_len = vector_length_encoding(this);
23826 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23827 %}
23828 ins_pipe( pipe_slow );
23829 %}
23830
23831 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23832 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23833 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23834 effect(TEMP dst);
23835 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23836 ins_encode %{
23837 int vector_len = vector_length_encoding(this);
23838 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23839 %}
23840 ins_pipe( pipe_slow );
23841 %}
23842
23843 // --------------------------------- Rotation Operations ----------------------------------
23844 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23845 match(Set dst (RotateLeftV src shift));
23846 match(Set dst (RotateRightV src shift));
23847 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23848 ins_encode %{
23849 int opcode = this->ideal_Opcode();
23850 int vector_len = vector_length_encoding(this);
23851 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23852 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23853 %}
23854 ins_pipe( pipe_slow );
23855 %}
23856
23857 instruct vprorate(vec dst, vec src, vec shift) %{
23858 match(Set dst (RotateLeftV src shift));
23859 match(Set dst (RotateRightV src shift));
23860 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23861 ins_encode %{
23862 int opcode = this->ideal_Opcode();
23863 int vector_len = vector_length_encoding(this);
23864 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23865 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23866 %}
23867 ins_pipe( pipe_slow );
23868 %}
23869
23870 // ---------------------------------- Masked Operations ------------------------------------
23871 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23872 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23873 match(Set dst (LoadVectorMasked mem mask));
23874 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23875 ins_encode %{
23876 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23877 int vlen_enc = vector_length_encoding(this);
23878 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23879 %}
23880 ins_pipe( pipe_slow );
23881 %}
23882
23883
23884 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23885 predicate(n->in(3)->bottom_type()->isa_vectmask());
23886 match(Set dst (LoadVectorMasked mem mask));
23887 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23888 ins_encode %{
23889 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23890 int vector_len = vector_length_encoding(this);
23891 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23892 %}
23893 ins_pipe( pipe_slow );
23894 %}
23895
23896 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23897 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23898 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23899 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23900 ins_encode %{
23901 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23902 int vlen_enc = vector_length_encoding(src_node);
23903 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23904 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23905 %}
23906 ins_pipe( pipe_slow );
23907 %}
23908
23909 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23910 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23911 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23912 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23913 ins_encode %{
23914 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23915 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23916 int vlen_enc = vector_length_encoding(src_node);
23917 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23918 %}
23919 ins_pipe( pipe_slow );
23920 %}
23921
23922 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23923 match(Set addr (VerifyVectorAlignment addr mask));
23924 effect(KILL cr);
23925 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23926 ins_encode %{
23927 Label Lskip;
23928 // check if masked bits of addr are zero
23929 __ testq($addr$$Register, $mask$$constant);
23930 __ jccb(Assembler::equal, Lskip);
23931 __ stop("verify_vector_alignment found a misaligned vector memory access");
23932 __ bind(Lskip);
23933 %}
23934 ins_pipe(pipe_slow);
23935 %}
23936
23937 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23938 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23939 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23940 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23941 ins_encode %{
23942 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23943 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23944
23945 Label DONE;
23946 int vlen_enc = vector_length_encoding(this, $src1);
23947 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23948
23949 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23950 __ mov64($dst$$Register, -1L);
23951 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23952 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23953 __ jccb(Assembler::carrySet, DONE);
23954 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23955 __ notq($dst$$Register);
23956 __ tzcntq($dst$$Register, $dst$$Register);
23957 __ bind(DONE);
23958 %}
23959 ins_pipe( pipe_slow );
23960 %}
23961
23962
23963 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23964 match(Set dst (VectorMaskGen len));
23965 effect(TEMP temp, KILL cr);
23966 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23967 ins_encode %{
23968 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23969 %}
23970 ins_pipe( pipe_slow );
23971 %}
23972
23973 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23974 match(Set dst (VectorMaskGen len));
23975 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23976 effect(TEMP temp);
23977 ins_encode %{
23978 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23979 __ kmovql($dst$$KRegister, $temp$$Register);
23980 %}
23981 ins_pipe( pipe_slow );
23982 %}
23983
23984 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23985 predicate(n->in(1)->bottom_type()->isa_vectmask());
23986 match(Set dst (VectorMaskToLong mask));
23987 effect(TEMP dst, KILL cr);
23988 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23989 ins_encode %{
23990 int opcode = this->ideal_Opcode();
23991 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23992 int mask_len = Matcher::vector_length(this, $mask);
23993 int mask_size = mask_len * type2aelembytes(mbt);
23994 int vlen_enc = vector_length_encoding(this, $mask);
23995 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23996 $dst$$Register, mask_len, mask_size, vlen_enc);
23997 %}
23998 ins_pipe( pipe_slow );
23999 %}
24000
24001 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24002 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24003 match(Set dst (VectorMaskToLong mask));
24004 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24005 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24006 ins_encode %{
24007 int opcode = this->ideal_Opcode();
24008 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24009 int mask_len = Matcher::vector_length(this, $mask);
24010 int vlen_enc = vector_length_encoding(this, $mask);
24011 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24012 $dst$$Register, mask_len, mbt, vlen_enc);
24013 %}
24014 ins_pipe( pipe_slow );
24015 %}
24016
24017 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24018 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24019 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24020 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24021 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24022 ins_encode %{
24023 int opcode = this->ideal_Opcode();
24024 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24025 int mask_len = Matcher::vector_length(this, $mask);
24026 int vlen_enc = vector_length_encoding(this, $mask);
24027 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24028 $dst$$Register, mask_len, mbt, vlen_enc);
24029 %}
24030 ins_pipe( pipe_slow );
24031 %}
24032
24033 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24034 predicate(n->in(1)->bottom_type()->isa_vectmask());
24035 match(Set dst (VectorMaskTrueCount mask));
24036 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24037 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24038 ins_encode %{
24039 int opcode = this->ideal_Opcode();
24040 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24041 int mask_len = Matcher::vector_length(this, $mask);
24042 int mask_size = mask_len * type2aelembytes(mbt);
24043 int vlen_enc = vector_length_encoding(this, $mask);
24044 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24045 $tmp$$Register, mask_len, mask_size, vlen_enc);
24046 %}
24047 ins_pipe( pipe_slow );
24048 %}
24049
24050 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24051 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24052 match(Set dst (VectorMaskTrueCount mask));
24053 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24054 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24055 ins_encode %{
24056 int opcode = this->ideal_Opcode();
24057 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24058 int mask_len = Matcher::vector_length(this, $mask);
24059 int vlen_enc = vector_length_encoding(this, $mask);
24060 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24061 $tmp$$Register, mask_len, mbt, vlen_enc);
24062 %}
24063 ins_pipe( pipe_slow );
24064 %}
24065
24066 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24067 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24068 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24069 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24070 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24071 ins_encode %{
24072 int opcode = this->ideal_Opcode();
24073 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24074 int mask_len = Matcher::vector_length(this, $mask);
24075 int vlen_enc = vector_length_encoding(this, $mask);
24076 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24077 $tmp$$Register, mask_len, mbt, vlen_enc);
24078 %}
24079 ins_pipe( pipe_slow );
24080 %}
24081
24082 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24083 predicate(n->in(1)->bottom_type()->isa_vectmask());
24084 match(Set dst (VectorMaskFirstTrue mask));
24085 match(Set dst (VectorMaskLastTrue mask));
24086 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24087 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24088 ins_encode %{
24089 int opcode = this->ideal_Opcode();
24090 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24091 int mask_len = Matcher::vector_length(this, $mask);
24092 int mask_size = mask_len * type2aelembytes(mbt);
24093 int vlen_enc = vector_length_encoding(this, $mask);
24094 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24095 $tmp$$Register, mask_len, mask_size, vlen_enc);
24096 %}
24097 ins_pipe( pipe_slow );
24098 %}
24099
24100 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24101 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24102 match(Set dst (VectorMaskFirstTrue mask));
24103 match(Set dst (VectorMaskLastTrue mask));
24104 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24105 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24106 ins_encode %{
24107 int opcode = this->ideal_Opcode();
24108 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24109 int mask_len = Matcher::vector_length(this, $mask);
24110 int vlen_enc = vector_length_encoding(this, $mask);
24111 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24112 $tmp$$Register, mask_len, mbt, vlen_enc);
24113 %}
24114 ins_pipe( pipe_slow );
24115 %}
24116
24117 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24118 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24119 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24120 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24121 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24122 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24123 ins_encode %{
24124 int opcode = this->ideal_Opcode();
24125 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24126 int mask_len = Matcher::vector_length(this, $mask);
24127 int vlen_enc = vector_length_encoding(this, $mask);
24128 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24129 $tmp$$Register, mask_len, mbt, vlen_enc);
24130 %}
24131 ins_pipe( pipe_slow );
24132 %}
24133
24134 // --------------------------------- Compress/Expand Operations ---------------------------
24135 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24136 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24137 match(Set dst (CompressV src mask));
24138 match(Set dst (ExpandV src mask));
24139 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24140 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24141 ins_encode %{
24142 int opcode = this->ideal_Opcode();
24143 int vlen_enc = vector_length_encoding(this);
24144 BasicType bt = Matcher::vector_element_basic_type(this);
24145 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24146 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24147 %}
24148 ins_pipe( pipe_slow );
24149 %}
24150
24151 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24152 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24153 match(Set dst (CompressV src mask));
24154 match(Set dst (ExpandV src mask));
24155 format %{ "vector_compress_expand $dst, $src, $mask" %}
24156 ins_encode %{
24157 int opcode = this->ideal_Opcode();
24158 int vector_len = vector_length_encoding(this);
24159 BasicType bt = Matcher::vector_element_basic_type(this);
24160 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24161 %}
24162 ins_pipe( pipe_slow );
24163 %}
24164
24165 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24166 match(Set dst (CompressM mask));
24167 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24168 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24169 ins_encode %{
24170 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24171 int mask_len = Matcher::vector_length(this);
24172 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24173 %}
24174 ins_pipe( pipe_slow );
24175 %}
24176
24177 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24178
24179 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24180 predicate(!VM_Version::supports_gfni());
24181 match(Set dst (ReverseV src));
24182 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24183 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24184 ins_encode %{
24185 int vec_enc = vector_length_encoding(this);
24186 BasicType bt = Matcher::vector_element_basic_type(this);
24187 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24188 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24189 %}
24190 ins_pipe( pipe_slow );
24191 %}
24192
24193 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24194 predicate(VM_Version::supports_gfni());
24195 match(Set dst (ReverseV src));
24196 effect(TEMP dst, TEMP xtmp);
24197 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24198 ins_encode %{
24199 int vec_enc = vector_length_encoding(this);
24200 BasicType bt = Matcher::vector_element_basic_type(this);
24201 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24202 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24203 $xtmp$$XMMRegister);
24204 %}
24205 ins_pipe( pipe_slow );
24206 %}
24207
24208 instruct vreverse_byte_reg(vec dst, vec src) %{
24209 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24210 match(Set dst (ReverseBytesV src));
24211 effect(TEMP dst);
24212 format %{ "vector_reverse_byte $dst, $src" %}
24213 ins_encode %{
24214 int vec_enc = vector_length_encoding(this);
24215 BasicType bt = Matcher::vector_element_basic_type(this);
24216 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24217 %}
24218 ins_pipe( pipe_slow );
24219 %}
24220
24221 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24222 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24223 match(Set dst (ReverseBytesV src));
24224 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24225 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24226 ins_encode %{
24227 int vec_enc = vector_length_encoding(this);
24228 BasicType bt = Matcher::vector_element_basic_type(this);
24229 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24230 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24231 %}
24232 ins_pipe( pipe_slow );
24233 %}
24234
24235 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24236
24237 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24238 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24239 Matcher::vector_length_in_bytes(n->in(1))));
24240 match(Set dst (CountLeadingZerosV src));
24241 format %{ "vector_count_leading_zeros $dst, $src" %}
24242 ins_encode %{
24243 int vlen_enc = vector_length_encoding(this, $src);
24244 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24245 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24246 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24247 %}
24248 ins_pipe( pipe_slow );
24249 %}
24250
24251 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24252 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24253 Matcher::vector_length_in_bytes(n->in(1))));
24254 match(Set dst (CountLeadingZerosV src mask));
24255 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24256 ins_encode %{
24257 int vlen_enc = vector_length_encoding(this, $src);
24258 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24259 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24260 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24261 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24262 %}
24263 ins_pipe( pipe_slow );
24264 %}
24265
24266 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24267 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24268 VM_Version::supports_avx512cd() &&
24269 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24270 match(Set dst (CountLeadingZerosV src));
24271 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24272 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24273 ins_encode %{
24274 int vlen_enc = vector_length_encoding(this, $src);
24275 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24276 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24277 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24278 %}
24279 ins_pipe( pipe_slow );
24280 %}
24281
24282 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24283 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24284 match(Set dst (CountLeadingZerosV src));
24285 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24286 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24287 ins_encode %{
24288 int vlen_enc = vector_length_encoding(this, $src);
24289 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24290 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24291 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24292 $rtmp$$Register, true, vlen_enc);
24293 %}
24294 ins_pipe( pipe_slow );
24295 %}
24296
24297 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24298 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24299 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24300 match(Set dst (CountLeadingZerosV src));
24301 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24302 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24303 ins_encode %{
24304 int vlen_enc = vector_length_encoding(this, $src);
24305 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24306 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24307 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24308 %}
24309 ins_pipe( pipe_slow );
24310 %}
24311
24312 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24313 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24314 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24315 match(Set dst (CountLeadingZerosV src));
24316 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24317 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24318 ins_encode %{
24319 int vlen_enc = vector_length_encoding(this, $src);
24320 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24321 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24322 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24323 %}
24324 ins_pipe( pipe_slow );
24325 %}
24326
24327 // ---------------------------------- Vector Masked Operations ------------------------------------
24328
24329 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24330 match(Set dst (AddVB (Binary dst src2) mask));
24331 match(Set dst (AddVS (Binary dst src2) mask));
24332 match(Set dst (AddVI (Binary dst src2) mask));
24333 match(Set dst (AddVL (Binary dst src2) mask));
24334 match(Set dst (AddVF (Binary dst src2) mask));
24335 match(Set dst (AddVD (Binary dst src2) mask));
24336 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24337 ins_encode %{
24338 int vlen_enc = vector_length_encoding(this);
24339 BasicType bt = Matcher::vector_element_basic_type(this);
24340 int opc = this->ideal_Opcode();
24341 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24342 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24343 %}
24344 ins_pipe( pipe_slow );
24345 %}
24346
24347 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24348 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24349 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24350 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24351 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24352 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24353 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24354 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24355 ins_encode %{
24356 int vlen_enc = vector_length_encoding(this);
24357 BasicType bt = Matcher::vector_element_basic_type(this);
24358 int opc = this->ideal_Opcode();
24359 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24360 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24361 %}
24362 ins_pipe( pipe_slow );
24363 %}
24364
24365 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24366 match(Set dst (XorV (Binary dst src2) mask));
24367 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24368 ins_encode %{
24369 int vlen_enc = vector_length_encoding(this);
24370 BasicType bt = Matcher::vector_element_basic_type(this);
24371 int opc = this->ideal_Opcode();
24372 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24373 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24374 %}
24375 ins_pipe( pipe_slow );
24376 %}
24377
24378 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24379 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24380 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24381 ins_encode %{
24382 int vlen_enc = vector_length_encoding(this);
24383 BasicType bt = Matcher::vector_element_basic_type(this);
24384 int opc = this->ideal_Opcode();
24385 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24386 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24387 %}
24388 ins_pipe( pipe_slow );
24389 %}
24390
24391 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24392 match(Set dst (OrV (Binary dst src2) mask));
24393 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24394 ins_encode %{
24395 int vlen_enc = vector_length_encoding(this);
24396 BasicType bt = Matcher::vector_element_basic_type(this);
24397 int opc = this->ideal_Opcode();
24398 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24399 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24400 %}
24401 ins_pipe( pipe_slow );
24402 %}
24403
24404 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24405 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24406 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24407 ins_encode %{
24408 int vlen_enc = vector_length_encoding(this);
24409 BasicType bt = Matcher::vector_element_basic_type(this);
24410 int opc = this->ideal_Opcode();
24411 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24412 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24413 %}
24414 ins_pipe( pipe_slow );
24415 %}
24416
24417 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24418 match(Set dst (AndV (Binary dst src2) mask));
24419 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24420 ins_encode %{
24421 int vlen_enc = vector_length_encoding(this);
24422 BasicType bt = Matcher::vector_element_basic_type(this);
24423 int opc = this->ideal_Opcode();
24424 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24425 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24426 %}
24427 ins_pipe( pipe_slow );
24428 %}
24429
24430 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24431 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24432 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24433 ins_encode %{
24434 int vlen_enc = vector_length_encoding(this);
24435 BasicType bt = Matcher::vector_element_basic_type(this);
24436 int opc = this->ideal_Opcode();
24437 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24438 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24439 %}
24440 ins_pipe( pipe_slow );
24441 %}
24442
24443 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24444 match(Set dst (SubVB (Binary dst src2) mask));
24445 match(Set dst (SubVS (Binary dst src2) mask));
24446 match(Set dst (SubVI (Binary dst src2) mask));
24447 match(Set dst (SubVL (Binary dst src2) mask));
24448 match(Set dst (SubVF (Binary dst src2) mask));
24449 match(Set dst (SubVD (Binary dst src2) mask));
24450 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24451 ins_encode %{
24452 int vlen_enc = vector_length_encoding(this);
24453 BasicType bt = Matcher::vector_element_basic_type(this);
24454 int opc = this->ideal_Opcode();
24455 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24456 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24457 %}
24458 ins_pipe( pipe_slow );
24459 %}
24460
24461 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24462 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24463 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24464 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24465 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24466 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24467 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24468 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24469 ins_encode %{
24470 int vlen_enc = vector_length_encoding(this);
24471 BasicType bt = Matcher::vector_element_basic_type(this);
24472 int opc = this->ideal_Opcode();
24473 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24474 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24475 %}
24476 ins_pipe( pipe_slow );
24477 %}
24478
24479 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24480 match(Set dst (MulVS (Binary dst src2) mask));
24481 match(Set dst (MulVI (Binary dst src2) mask));
24482 match(Set dst (MulVL (Binary dst src2) mask));
24483 match(Set dst (MulVF (Binary dst src2) mask));
24484 match(Set dst (MulVD (Binary dst src2) mask));
24485 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24486 ins_encode %{
24487 int vlen_enc = vector_length_encoding(this);
24488 BasicType bt = Matcher::vector_element_basic_type(this);
24489 int opc = this->ideal_Opcode();
24490 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24491 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24492 %}
24493 ins_pipe( pipe_slow );
24494 %}
24495
24496 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24497 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24498 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24499 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24500 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24501 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24502 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24503 ins_encode %{
24504 int vlen_enc = vector_length_encoding(this);
24505 BasicType bt = Matcher::vector_element_basic_type(this);
24506 int opc = this->ideal_Opcode();
24507 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24508 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24509 %}
24510 ins_pipe( pipe_slow );
24511 %}
24512
24513 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24514 match(Set dst (SqrtVF dst mask));
24515 match(Set dst (SqrtVD dst mask));
24516 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24517 ins_encode %{
24518 int vlen_enc = vector_length_encoding(this);
24519 BasicType bt = Matcher::vector_element_basic_type(this);
24520 int opc = this->ideal_Opcode();
24521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24522 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24523 %}
24524 ins_pipe( pipe_slow );
24525 %}
24526
24527 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24528 match(Set dst (DivVF (Binary dst src2) mask));
24529 match(Set dst (DivVD (Binary dst src2) mask));
24530 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24531 ins_encode %{
24532 int vlen_enc = vector_length_encoding(this);
24533 BasicType bt = Matcher::vector_element_basic_type(this);
24534 int opc = this->ideal_Opcode();
24535 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24536 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24537 %}
24538 ins_pipe( pipe_slow );
24539 %}
24540
24541 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24542 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24543 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24544 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24545 ins_encode %{
24546 int vlen_enc = vector_length_encoding(this);
24547 BasicType bt = Matcher::vector_element_basic_type(this);
24548 int opc = this->ideal_Opcode();
24549 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24550 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24551 %}
24552 ins_pipe( pipe_slow );
24553 %}
24554
24555
24556 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24557 match(Set dst (RotateLeftV (Binary dst shift) mask));
24558 match(Set dst (RotateRightV (Binary dst shift) mask));
24559 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24560 ins_encode %{
24561 int vlen_enc = vector_length_encoding(this);
24562 BasicType bt = Matcher::vector_element_basic_type(this);
24563 int opc = this->ideal_Opcode();
24564 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24565 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24566 %}
24567 ins_pipe( pipe_slow );
24568 %}
24569
24570 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24571 match(Set dst (RotateLeftV (Binary dst src2) mask));
24572 match(Set dst (RotateRightV (Binary dst src2) mask));
24573 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24574 ins_encode %{
24575 int vlen_enc = vector_length_encoding(this);
24576 BasicType bt = Matcher::vector_element_basic_type(this);
24577 int opc = this->ideal_Opcode();
24578 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24579 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24580 %}
24581 ins_pipe( pipe_slow );
24582 %}
24583
24584 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24585 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24586 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24587 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24588 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24589 ins_encode %{
24590 int vlen_enc = vector_length_encoding(this);
24591 BasicType bt = Matcher::vector_element_basic_type(this);
24592 int opc = this->ideal_Opcode();
24593 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24594 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24595 %}
24596 ins_pipe( pipe_slow );
24597 %}
24598
24599 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24600 predicate(!n->as_ShiftV()->is_var_shift());
24601 match(Set dst (LShiftVS (Binary dst src2) mask));
24602 match(Set dst (LShiftVI (Binary dst src2) mask));
24603 match(Set dst (LShiftVL (Binary dst src2) mask));
24604 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24605 ins_encode %{
24606 int vlen_enc = vector_length_encoding(this);
24607 BasicType bt = Matcher::vector_element_basic_type(this);
24608 int opc = this->ideal_Opcode();
24609 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24610 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24611 %}
24612 ins_pipe( pipe_slow );
24613 %}
24614
24615 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24616 predicate(n->as_ShiftV()->is_var_shift());
24617 match(Set dst (LShiftVS (Binary dst src2) mask));
24618 match(Set dst (LShiftVI (Binary dst src2) mask));
24619 match(Set dst (LShiftVL (Binary dst src2) mask));
24620 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24621 ins_encode %{
24622 int vlen_enc = vector_length_encoding(this);
24623 BasicType bt = Matcher::vector_element_basic_type(this);
24624 int opc = this->ideal_Opcode();
24625 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24626 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24627 %}
24628 ins_pipe( pipe_slow );
24629 %}
24630
24631 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24632 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24633 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24634 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24635 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24636 ins_encode %{
24637 int vlen_enc = vector_length_encoding(this);
24638 BasicType bt = Matcher::vector_element_basic_type(this);
24639 int opc = this->ideal_Opcode();
24640 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24641 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24642 %}
24643 ins_pipe( pipe_slow );
24644 %}
24645
24646 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24647 predicate(!n->as_ShiftV()->is_var_shift());
24648 match(Set dst (RShiftVS (Binary dst src2) mask));
24649 match(Set dst (RShiftVI (Binary dst src2) mask));
24650 match(Set dst (RShiftVL (Binary dst src2) mask));
24651 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24652 ins_encode %{
24653 int vlen_enc = vector_length_encoding(this);
24654 BasicType bt = Matcher::vector_element_basic_type(this);
24655 int opc = this->ideal_Opcode();
24656 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24657 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24658 %}
24659 ins_pipe( pipe_slow );
24660 %}
24661
24662 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24663 predicate(n->as_ShiftV()->is_var_shift());
24664 match(Set dst (RShiftVS (Binary dst src2) mask));
24665 match(Set dst (RShiftVI (Binary dst src2) mask));
24666 match(Set dst (RShiftVL (Binary dst src2) mask));
24667 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24668 ins_encode %{
24669 int vlen_enc = vector_length_encoding(this);
24670 BasicType bt = Matcher::vector_element_basic_type(this);
24671 int opc = this->ideal_Opcode();
24672 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24673 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24674 %}
24675 ins_pipe( pipe_slow );
24676 %}
24677
24678 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24679 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24680 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24681 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24682 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24683 ins_encode %{
24684 int vlen_enc = vector_length_encoding(this);
24685 BasicType bt = Matcher::vector_element_basic_type(this);
24686 int opc = this->ideal_Opcode();
24687 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24688 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24689 %}
24690 ins_pipe( pipe_slow );
24691 %}
24692
24693 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24694 predicate(!n->as_ShiftV()->is_var_shift());
24695 match(Set dst (URShiftVS (Binary dst src2) mask));
24696 match(Set dst (URShiftVI (Binary dst src2) mask));
24697 match(Set dst (URShiftVL (Binary dst src2) mask));
24698 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24699 ins_encode %{
24700 int vlen_enc = vector_length_encoding(this);
24701 BasicType bt = Matcher::vector_element_basic_type(this);
24702 int opc = this->ideal_Opcode();
24703 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24704 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24705 %}
24706 ins_pipe( pipe_slow );
24707 %}
24708
24709 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24710 predicate(n->as_ShiftV()->is_var_shift());
24711 match(Set dst (URShiftVS (Binary dst src2) mask));
24712 match(Set dst (URShiftVI (Binary dst src2) mask));
24713 match(Set dst (URShiftVL (Binary dst src2) mask));
24714 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24715 ins_encode %{
24716 int vlen_enc = vector_length_encoding(this);
24717 BasicType bt = Matcher::vector_element_basic_type(this);
24718 int opc = this->ideal_Opcode();
24719 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24720 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24721 %}
24722 ins_pipe( pipe_slow );
24723 %}
24724
24725 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24726 match(Set dst (MaxV (Binary dst src2) mask));
24727 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24728 ins_encode %{
24729 int vlen_enc = vector_length_encoding(this);
24730 BasicType bt = Matcher::vector_element_basic_type(this);
24731 int opc = this->ideal_Opcode();
24732 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24733 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24734 %}
24735 ins_pipe( pipe_slow );
24736 %}
24737
24738 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24739 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24740 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24741 ins_encode %{
24742 int vlen_enc = vector_length_encoding(this);
24743 BasicType bt = Matcher::vector_element_basic_type(this);
24744 int opc = this->ideal_Opcode();
24745 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24746 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24747 %}
24748 ins_pipe( pipe_slow );
24749 %}
24750
24751 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24752 match(Set dst (MinV (Binary dst src2) mask));
24753 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24754 ins_encode %{
24755 int vlen_enc = vector_length_encoding(this);
24756 BasicType bt = Matcher::vector_element_basic_type(this);
24757 int opc = this->ideal_Opcode();
24758 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24759 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24760 %}
24761 ins_pipe( pipe_slow );
24762 %}
24763
24764 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24765 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24766 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24767 ins_encode %{
24768 int vlen_enc = vector_length_encoding(this);
24769 BasicType bt = Matcher::vector_element_basic_type(this);
24770 int opc = this->ideal_Opcode();
24771 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24772 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24773 %}
24774 ins_pipe( pipe_slow );
24775 %}
24776
24777 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24778 match(Set dst (VectorRearrange (Binary dst src2) mask));
24779 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24780 ins_encode %{
24781 int vlen_enc = vector_length_encoding(this);
24782 BasicType bt = Matcher::vector_element_basic_type(this);
24783 int opc = this->ideal_Opcode();
24784 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24785 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24786 %}
24787 ins_pipe( pipe_slow );
24788 %}
24789
24790 instruct vabs_masked(vec dst, kReg mask) %{
24791 match(Set dst (AbsVB dst mask));
24792 match(Set dst (AbsVS dst mask));
24793 match(Set dst (AbsVI dst mask));
24794 match(Set dst (AbsVL dst mask));
24795 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24796 ins_encode %{
24797 int vlen_enc = vector_length_encoding(this);
24798 BasicType bt = Matcher::vector_element_basic_type(this);
24799 int opc = this->ideal_Opcode();
24800 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24801 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24802 %}
24803 ins_pipe( pipe_slow );
24804 %}
24805
24806 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24807 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24808 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24809 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24810 ins_encode %{
24811 assert(UseFMA, "Needs FMA instructions support.");
24812 int vlen_enc = vector_length_encoding(this);
24813 BasicType bt = Matcher::vector_element_basic_type(this);
24814 int opc = this->ideal_Opcode();
24815 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24816 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24817 %}
24818 ins_pipe( pipe_slow );
24819 %}
24820
24821 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24822 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24823 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24824 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24825 ins_encode %{
24826 assert(UseFMA, "Needs FMA instructions support.");
24827 int vlen_enc = vector_length_encoding(this);
24828 BasicType bt = Matcher::vector_element_basic_type(this);
24829 int opc = this->ideal_Opcode();
24830 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24831 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24832 %}
24833 ins_pipe( pipe_slow );
24834 %}
24835
24836 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24837 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24838 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24839 ins_encode %{
24840 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24841 int vlen_enc = vector_length_encoding(this, $src1);
24842 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24843
24844 // Comparison i
24845 switch (src1_elem_bt) {
24846 case T_BYTE: {
24847 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24848 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24849 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24850 break;
24851 }
24852 case T_SHORT: {
24853 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24854 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24855 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24856 break;
24857 }
24858 case T_INT: {
24859 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24860 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24861 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24862 break;
24863 }
24864 case T_LONG: {
24865 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24866 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24867 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24868 break;
24869 }
24870 case T_FLOAT: {
24871 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24872 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24873 break;
24874 }
24875 case T_DOUBLE: {
24876 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24877 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24878 break;
24879 }
24880 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24881 }
24882 %}
24883 ins_pipe( pipe_slow );
24884 %}
24885
24886 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24887 predicate(Matcher::vector_length(n) <= 32);
24888 match(Set dst (MaskAll src));
24889 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24890 ins_encode %{
24891 int mask_len = Matcher::vector_length(this);
24892 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24893 %}
24894 ins_pipe( pipe_slow );
24895 %}
24896
24897 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24898 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24899 match(Set dst (XorVMask src (MaskAll cnt)));
24900 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24901 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24902 ins_encode %{
24903 uint masklen = Matcher::vector_length(this);
24904 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24905 %}
24906 ins_pipe( pipe_slow );
24907 %}
24908
24909 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24910 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24911 (Matcher::vector_length(n) == 16) ||
24912 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24913 match(Set dst (XorVMask src (MaskAll cnt)));
24914 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24915 ins_encode %{
24916 uint masklen = Matcher::vector_length(this);
24917 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24918 %}
24919 ins_pipe( pipe_slow );
24920 %}
24921
24922 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24923 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24924 match(Set dst (VectorLongToMask src));
24925 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24926 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24927 ins_encode %{
24928 int mask_len = Matcher::vector_length(this);
24929 int vec_enc = vector_length_encoding(mask_len);
24930 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24931 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24932 %}
24933 ins_pipe( pipe_slow );
24934 %}
24935
24936
24937 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24938 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24939 match(Set dst (VectorLongToMask src));
24940 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24941 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24942 ins_encode %{
24943 int mask_len = Matcher::vector_length(this);
24944 assert(mask_len <= 32, "invalid mask length");
24945 int vec_enc = vector_length_encoding(mask_len);
24946 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24947 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24948 %}
24949 ins_pipe( pipe_slow );
24950 %}
24951
24952 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24953 predicate(n->bottom_type()->isa_vectmask());
24954 match(Set dst (VectorLongToMask src));
24955 format %{ "long_to_mask_evex $dst, $src\t!" %}
24956 ins_encode %{
24957 __ kmov($dst$$KRegister, $src$$Register);
24958 %}
24959 ins_pipe( pipe_slow );
24960 %}
24961
24962 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24963 match(Set dst (AndVMask src1 src2));
24964 match(Set dst (OrVMask src1 src2));
24965 match(Set dst (XorVMask src1 src2));
24966 effect(TEMP kscratch);
24967 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24968 ins_encode %{
24969 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24970 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24971 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24972 uint masklen = Matcher::vector_length(this);
24973 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24974 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24975 %}
24976 ins_pipe( pipe_slow );
24977 %}
24978
24979 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24980 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24981 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24982 ins_encode %{
24983 int vlen_enc = vector_length_encoding(this);
24984 BasicType bt = Matcher::vector_element_basic_type(this);
24985 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24986 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24987 %}
24988 ins_pipe( pipe_slow );
24989 %}
24990
24991 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24992 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24993 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24994 ins_encode %{
24995 int vlen_enc = vector_length_encoding(this);
24996 BasicType bt = Matcher::vector_element_basic_type(this);
24997 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24998 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24999 %}
25000 ins_pipe( pipe_slow );
25001 %}
25002
25003 instruct castMM(kReg dst)
25004 %{
25005 match(Set dst (CastVV dst));
25006
25007 size(0);
25008 format %{ "# castVV of $dst" %}
25009 ins_encode(/* empty encoding */);
25010 ins_cost(0);
25011 ins_pipe(empty);
25012 %}
25013
25014 instruct castVV(vec dst)
25015 %{
25016 match(Set dst (CastVV dst));
25017
25018 size(0);
25019 format %{ "# castVV of $dst" %}
25020 ins_encode(/* empty encoding */);
25021 ins_cost(0);
25022 ins_pipe(empty);
25023 %}
25024
25025 instruct castVVLeg(legVec dst)
25026 %{
25027 match(Set dst (CastVV dst));
25028
25029 size(0);
25030 format %{ "# castVV of $dst" %}
25031 ins_encode(/* empty encoding */);
25032 ins_cost(0);
25033 ins_pipe(empty);
25034 %}
25035
25036 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25037 %{
25038 match(Set dst (IsInfiniteF src));
25039 effect(TEMP ktmp, KILL cr);
25040 format %{ "float_class_check $dst, $src" %}
25041 ins_encode %{
25042 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25043 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25044 %}
25045 ins_pipe(pipe_slow);
25046 %}
25047
25048 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25049 %{
25050 match(Set dst (IsInfiniteD src));
25051 effect(TEMP ktmp, KILL cr);
25052 format %{ "double_class_check $dst, $src" %}
25053 ins_encode %{
25054 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25055 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25056 %}
25057 ins_pipe(pipe_slow);
25058 %}
25059
25060 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25061 %{
25062 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25063 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25064 match(Set dst (SaturatingAddV src1 src2));
25065 match(Set dst (SaturatingSubV src1 src2));
25066 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25067 ins_encode %{
25068 int vlen_enc = vector_length_encoding(this);
25069 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25070 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25071 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25072 %}
25073 ins_pipe(pipe_slow);
25074 %}
25075
25076 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25077 %{
25078 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25079 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25080 match(Set dst (SaturatingAddV src1 src2));
25081 match(Set dst (SaturatingSubV src1 src2));
25082 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25083 ins_encode %{
25084 int vlen_enc = vector_length_encoding(this);
25085 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25086 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25087 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25088 %}
25089 ins_pipe(pipe_slow);
25090 %}
25091
25092 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25093 %{
25094 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25095 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25096 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25097 match(Set dst (SaturatingAddV src1 src2));
25098 match(Set dst (SaturatingSubV src1 src2));
25099 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25100 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25101 ins_encode %{
25102 int vlen_enc = vector_length_encoding(this);
25103 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25104 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25105 $src1$$XMMRegister, $src2$$XMMRegister,
25106 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25107 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25108 %}
25109 ins_pipe(pipe_slow);
25110 %}
25111
25112 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25113 %{
25114 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25115 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25116 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25117 match(Set dst (SaturatingAddV src1 src2));
25118 match(Set dst (SaturatingSubV src1 src2));
25119 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25120 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25121 ins_encode %{
25122 int vlen_enc = vector_length_encoding(this);
25123 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25124 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25125 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25126 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25127 %}
25128 ins_pipe(pipe_slow);
25129 %}
25130
25131 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25132 %{
25133 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25134 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25135 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25136 match(Set dst (SaturatingAddV src1 src2));
25137 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25138 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25139 ins_encode %{
25140 int vlen_enc = vector_length_encoding(this);
25141 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25142 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25143 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25144 %}
25145 ins_pipe(pipe_slow);
25146 %}
25147
25148 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25149 %{
25150 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25151 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25152 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25153 match(Set dst (SaturatingAddV src1 src2));
25154 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25155 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25156 ins_encode %{
25157 int vlen_enc = vector_length_encoding(this);
25158 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25159 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25160 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25161 %}
25162 ins_pipe(pipe_slow);
25163 %}
25164
25165 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25166 %{
25167 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25168 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25169 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25170 match(Set dst (SaturatingSubV src1 src2));
25171 effect(TEMP ktmp);
25172 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25173 ins_encode %{
25174 int vlen_enc = vector_length_encoding(this);
25175 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25176 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25177 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25178 %}
25179 ins_pipe(pipe_slow);
25180 %}
25181
25182 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25183 %{
25184 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25185 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25186 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25187 match(Set dst (SaturatingSubV src1 src2));
25188 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25189 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25190 ins_encode %{
25191 int vlen_enc = vector_length_encoding(this);
25192 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25193 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25194 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25195 %}
25196 ins_pipe(pipe_slow);
25197 %}
25198
25199 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25200 %{
25201 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25202 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25203 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25204 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25205 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25206 ins_encode %{
25207 int vlen_enc = vector_length_encoding(this);
25208 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25209 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25210 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25211 %}
25212 ins_pipe(pipe_slow);
25213 %}
25214
25215 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25216 %{
25217 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25218 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25219 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25220 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25221 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25222 ins_encode %{
25223 int vlen_enc = vector_length_encoding(this);
25224 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25226 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25227 %}
25228 ins_pipe(pipe_slow);
25229 %}
25230
25231 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25232 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25233 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25234 match(Set dst (SaturatingAddV (Binary dst src) mask));
25235 match(Set dst (SaturatingSubV (Binary dst src) mask));
25236 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25237 ins_encode %{
25238 int vlen_enc = vector_length_encoding(this);
25239 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25240 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25241 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25242 %}
25243 ins_pipe( pipe_slow );
25244 %}
25245
25246 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25247 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25249 match(Set dst (SaturatingAddV (Binary dst src) mask));
25250 match(Set dst (SaturatingSubV (Binary dst src) mask));
25251 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25252 ins_encode %{
25253 int vlen_enc = vector_length_encoding(this);
25254 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25255 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25256 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25257 %}
25258 ins_pipe( pipe_slow );
25259 %}
25260
25261 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25262 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25263 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25264 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25265 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25266 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25267 ins_encode %{
25268 int vlen_enc = vector_length_encoding(this);
25269 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25270 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25271 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25272 %}
25273 ins_pipe( pipe_slow );
25274 %}
25275
25276 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25277 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25278 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25279 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25280 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25281 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25282 ins_encode %{
25283 int vlen_enc = vector_length_encoding(this);
25284 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25285 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25286 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25287 %}
25288 ins_pipe( pipe_slow );
25289 %}
25290
25291 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25292 %{
25293 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25294 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25295 ins_encode %{
25296 int vlen_enc = vector_length_encoding(this);
25297 BasicType bt = Matcher::vector_element_basic_type(this);
25298 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25299 %}
25300 ins_pipe(pipe_slow);
25301 %}
25302
25303 instruct reinterpretS2HF(regF dst, rRegI src)
25304 %{
25305 match(Set dst (ReinterpretS2HF src));
25306 format %{ "vmovw $dst, $src" %}
25307 ins_encode %{
25308 __ vmovw($dst$$XMMRegister, $src$$Register);
25309 %}
25310 ins_pipe(pipe_slow);
25311 %}
25312
25313 instruct reinterpretHF2S(rRegI dst, regF src)
25314 %{
25315 match(Set dst (ReinterpretHF2S src));
25316 format %{ "vmovw $dst, $src" %}
25317 ins_encode %{
25318 __ vmovw($dst$$Register, $src$$XMMRegister);
25319 %}
25320 ins_pipe(pipe_slow);
25321 %}
25322
25323 instruct convF2HFAndS2HF(regF dst, regF src)
25324 %{
25325 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25326 format %{ "convF2HFAndS2HF $dst, $src" %}
25327 ins_encode %{
25328 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25329 %}
25330 ins_pipe(pipe_slow);
25331 %}
25332
25333 instruct convHF2SAndHF2F(regF dst, regF src)
25334 %{
25335 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25336 format %{ "convHF2SAndHF2F $dst, $src" %}
25337 ins_encode %{
25338 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25339 %}
25340 ins_pipe(pipe_slow);
25341 %}
25342
25343 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25344 %{
25345 match(Set dst (SqrtHF src));
25346 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25347 ins_encode %{
25348 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25349 %}
25350 ins_pipe(pipe_slow);
25351 %}
25352
25353 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25354 %{
25355 match(Set dst (AddHF src1 src2));
25356 match(Set dst (DivHF src1 src2));
25357 match(Set dst (MulHF src1 src2));
25358 match(Set dst (SubHF src1 src2));
25359 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25360 ins_encode %{
25361 int opcode = this->ideal_Opcode();
25362 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25363 %}
25364 ins_pipe(pipe_slow);
25365 %}
25366
25367 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25368 %{
25369 predicate(VM_Version::supports_avx10_2());
25370 match(Set dst (MaxHF src1 src2));
25371 match(Set dst (MinHF src1 src2));
25372 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25373 ins_encode %{
25374 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25375 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25376 %}
25377 ins_pipe( pipe_slow );
25378 %}
25379
25380 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25381 %{
25382 predicate(!VM_Version::supports_avx10_2());
25383 match(Set dst (MaxHF src1 src2));
25384 match(Set dst (MinHF src1 src2));
25385 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25386 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25387 ins_encode %{
25388 int opcode = this->ideal_Opcode();
25389 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25390 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25391 %}
25392 ins_pipe( pipe_slow );
25393 %}
25394
25395 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25396 %{
25397 match(Set dst (FmaHF src2 (Binary dst src1)));
25398 effect(DEF dst);
25399 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25400 ins_encode %{
25401 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25402 %}
25403 ins_pipe( pipe_slow );
25404 %}
25405
25406
25407 instruct vector_sqrt_HF_reg(vec dst, vec src)
25408 %{
25409 match(Set dst (SqrtVHF src));
25410 format %{ "vector_sqrt_fp16 $dst, $src" %}
25411 ins_encode %{
25412 int vlen_enc = vector_length_encoding(this);
25413 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25414 %}
25415 ins_pipe(pipe_slow);
25416 %}
25417
25418 instruct vector_sqrt_HF_mem(vec dst, memory src)
25419 %{
25420 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25421 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25422 ins_encode %{
25423 int vlen_enc = vector_length_encoding(this);
25424 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25425 %}
25426 ins_pipe(pipe_slow);
25427 %}
25428
25429 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25430 %{
25431 match(Set dst (AddVHF src1 src2));
25432 match(Set dst (DivVHF src1 src2));
25433 match(Set dst (MulVHF src1 src2));
25434 match(Set dst (SubVHF src1 src2));
25435 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25436 ins_encode %{
25437 int vlen_enc = vector_length_encoding(this);
25438 int opcode = this->ideal_Opcode();
25439 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25440 %}
25441 ins_pipe(pipe_slow);
25442 %}
25443
25444
25445 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25446 %{
25447 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25448 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25449 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25450 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25451 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25452 ins_encode %{
25453 int vlen_enc = vector_length_encoding(this);
25454 int opcode = this->ideal_Opcode();
25455 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25456 %}
25457 ins_pipe(pipe_slow);
25458 %}
25459
25460 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25461 %{
25462 match(Set dst (FmaVHF src2 (Binary dst src1)));
25463 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25464 ins_encode %{
25465 int vlen_enc = vector_length_encoding(this);
25466 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25467 %}
25468 ins_pipe( pipe_slow );
25469 %}
25470
25471 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25472 %{
25473 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25474 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25475 ins_encode %{
25476 int vlen_enc = vector_length_encoding(this);
25477 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25478 %}
25479 ins_pipe( pipe_slow );
25480 %}
25481
25482 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25483 %{
25484 predicate(VM_Version::supports_avx10_2());
25485 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25486 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25487 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25488 ins_encode %{
25489 int vlen_enc = vector_length_encoding(this);
25490 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25491 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25492 %}
25493 ins_pipe( pipe_slow );
25494 %}
25495
25496 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25497 %{
25498 predicate(VM_Version::supports_avx10_2());
25499 match(Set dst (MinVHF src1 src2));
25500 match(Set dst (MaxVHF src1 src2));
25501 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25502 ins_encode %{
25503 int vlen_enc = vector_length_encoding(this);
25504 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25505 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25506 %}
25507 ins_pipe( pipe_slow );
25508 %}
25509
25510 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25511 %{
25512 predicate(!VM_Version::supports_avx10_2());
25513 match(Set dst (MinVHF src1 src2));
25514 match(Set dst (MaxVHF src1 src2));
25515 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25516 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25517 ins_encode %{
25518 int vlen_enc = vector_length_encoding(this);
25519 int opcode = this->ideal_Opcode();
25520 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25521 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25522 %}
25523 ins_pipe( pipe_slow );
25524 %}
25525
25526 //----------PEEPHOLE RULES-----------------------------------------------------
25527 // These must follow all instruction definitions as they use the names
25528 // defined in the instructions definitions.
25529 //
25530 // peeppredicate ( rule_predicate );
25531 // // the predicate unless which the peephole rule will be ignored
25532 //
25533 // peepmatch ( root_instr_name [preceding_instruction]* );
25534 //
25535 // peepprocedure ( procedure_name );
25536 // // provide a procedure name to perform the optimization, the procedure should
25537 // // reside in the architecture dependent peephole file, the method has the
25538 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25539 // // with the arguments being the basic block, the current node index inside the
25540 // // block, the register allocator, the functions upon invoked return a new node
25541 // // defined in peepreplace, and the rules of the nodes appearing in the
25542 // // corresponding peepmatch, the function return true if successful, else
25543 // // return false
25544 //
25545 // peepconstraint %{
25546 // (instruction_number.operand_name relational_op instruction_number.operand_name
25547 // [, ...] );
25548 // // instruction numbers are zero-based using left to right order in peepmatch
25549 //
25550 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25551 // // provide an instruction_number.operand_name for each operand that appears
25552 // // in the replacement instruction's match rule
25553 //
25554 // ---------VM FLAGS---------------------------------------------------------
25555 //
25556 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25557 //
25558 // Each peephole rule is given an identifying number starting with zero and
25559 // increasing by one in the order seen by the parser. An individual peephole
25560 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25561 // on the command-line.
25562 //
25563 // ---------CURRENT LIMITATIONS----------------------------------------------
25564 //
25565 // Only transformations inside a basic block (do we need more for peephole)
25566 //
25567 // ---------EXAMPLE----------------------------------------------------------
25568 //
25569 // // pertinent parts of existing instructions in architecture description
25570 // instruct movI(rRegI dst, rRegI src)
25571 // %{
25572 // match(Set dst (CopyI src));
25573 // %}
25574 //
25575 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25576 // %{
25577 // match(Set dst (AddI dst src));
25578 // effect(KILL cr);
25579 // %}
25580 //
25581 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25582 // %{
25583 // match(Set dst (AddI dst src));
25584 // %}
25585 //
25586 // 1. Simple replacement
25587 // - Only match adjacent instructions in same basic block
25588 // - Only equality constraints
25589 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25590 // - Only one replacement instruction
25591 //
25592 // // Change (inc mov) to lea
25593 // peephole %{
25594 // // lea should only be emitted when beneficial
25595 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25596 // // increment preceded by register-register move
25597 // peepmatch ( incI_rReg movI );
25598 // // require that the destination register of the increment
25599 // // match the destination register of the move
25600 // peepconstraint ( 0.dst == 1.dst );
25601 // // construct a replacement instruction that sets
25602 // // the destination to ( move's source register + one )
25603 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25604 // %}
25605 //
25606 // 2. Procedural replacement
25607 // - More flexible finding relevent nodes
25608 // - More flexible constraints
25609 // - More flexible transformations
25610 // - May utilise architecture-dependent API more effectively
25611 // - Currently only one replacement instruction due to adlc parsing capabilities
25612 //
25613 // // Change (inc mov) to lea
25614 // peephole %{
25615 // // lea should only be emitted when beneficial
25616 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25617 // // the rule numbers of these nodes inside are passed into the function below
25618 // peepmatch ( incI_rReg movI );
25619 // // the method that takes the responsibility of transformation
25620 // peepprocedure ( inc_mov_to_lea );
25621 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25622 // // node is passed into the function above
25623 // peepreplace ( leaI_rReg_immI() );
25624 // %}
25625
25626 // These instructions is not matched by the matcher but used by the peephole
25627 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25628 %{
25629 predicate(false);
25630 match(Set dst (AddI src1 src2));
25631 format %{ "leal $dst, [$src1 + $src2]" %}
25632 ins_encode %{
25633 Register dst = $dst$$Register;
25634 Register src1 = $src1$$Register;
25635 Register src2 = $src2$$Register;
25636 if (src1 != rbp && src1 != r13) {
25637 __ leal(dst, Address(src1, src2, Address::times_1));
25638 } else {
25639 assert(src2 != rbp && src2 != r13, "");
25640 __ leal(dst, Address(src2, src1, Address::times_1));
25641 }
25642 %}
25643 ins_pipe(ialu_reg_reg);
25644 %}
25645
25646 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25647 %{
25648 predicate(false);
25649 match(Set dst (AddI src1 src2));
25650 format %{ "leal $dst, [$src1 + $src2]" %}
25651 ins_encode %{
25652 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25653 %}
25654 ins_pipe(ialu_reg_reg);
25655 %}
25656
25657 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25658 %{
25659 predicate(false);
25660 match(Set dst (LShiftI src shift));
25661 format %{ "leal $dst, [$src << $shift]" %}
25662 ins_encode %{
25663 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25664 Register src = $src$$Register;
25665 if (scale == Address::times_2 && src != rbp && src != r13) {
25666 __ leal($dst$$Register, Address(src, src, Address::times_1));
25667 } else {
25668 __ leal($dst$$Register, Address(noreg, src, scale));
25669 }
25670 %}
25671 ins_pipe(ialu_reg_reg);
25672 %}
25673
25674 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25675 %{
25676 predicate(false);
25677 match(Set dst (AddL src1 src2));
25678 format %{ "leaq $dst, [$src1 + $src2]" %}
25679 ins_encode %{
25680 Register dst = $dst$$Register;
25681 Register src1 = $src1$$Register;
25682 Register src2 = $src2$$Register;
25683 if (src1 != rbp && src1 != r13) {
25684 __ leaq(dst, Address(src1, src2, Address::times_1));
25685 } else {
25686 assert(src2 != rbp && src2 != r13, "");
25687 __ leaq(dst, Address(src2, src1, Address::times_1));
25688 }
25689 %}
25690 ins_pipe(ialu_reg_reg);
25691 %}
25692
25693 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25694 %{
25695 predicate(false);
25696 match(Set dst (AddL src1 src2));
25697 format %{ "leaq $dst, [$src1 + $src2]" %}
25698 ins_encode %{
25699 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25700 %}
25701 ins_pipe(ialu_reg_reg);
25702 %}
25703
25704 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25705 %{
25706 predicate(false);
25707 match(Set dst (LShiftL src shift));
25708 format %{ "leaq $dst, [$src << $shift]" %}
25709 ins_encode %{
25710 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25711 Register src = $src$$Register;
25712 if (scale == Address::times_2 && src != rbp && src != r13) {
25713 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25714 } else {
25715 __ leaq($dst$$Register, Address(noreg, src, scale));
25716 }
25717 %}
25718 ins_pipe(ialu_reg_reg);
25719 %}
25720
25721 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25722 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25723 // processors with at least partial ALU support for lea
25724 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25725 // beneficial for processors with full ALU support
25726 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25727
25728 peephole
25729 %{
25730 peeppredicate(VM_Version::supports_fast_2op_lea());
25731 peepmatch (addI_rReg);
25732 peepprocedure (lea_coalesce_reg);
25733 peepreplace (leaI_rReg_rReg_peep());
25734 %}
25735
25736 peephole
25737 %{
25738 peeppredicate(VM_Version::supports_fast_2op_lea());
25739 peepmatch (addI_rReg_imm);
25740 peepprocedure (lea_coalesce_imm);
25741 peepreplace (leaI_rReg_immI_peep());
25742 %}
25743
25744 peephole
25745 %{
25746 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25747 VM_Version::is_intel_cascade_lake());
25748 peepmatch (incI_rReg);
25749 peepprocedure (lea_coalesce_imm);
25750 peepreplace (leaI_rReg_immI_peep());
25751 %}
25752
25753 peephole
25754 %{
25755 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25756 VM_Version::is_intel_cascade_lake());
25757 peepmatch (decI_rReg);
25758 peepprocedure (lea_coalesce_imm);
25759 peepreplace (leaI_rReg_immI_peep());
25760 %}
25761
25762 peephole
25763 %{
25764 peeppredicate(VM_Version::supports_fast_2op_lea());
25765 peepmatch (salI_rReg_immI2);
25766 peepprocedure (lea_coalesce_imm);
25767 peepreplace (leaI_rReg_immI2_peep());
25768 %}
25769
25770 peephole
25771 %{
25772 peeppredicate(VM_Version::supports_fast_2op_lea());
25773 peepmatch (addL_rReg);
25774 peepprocedure (lea_coalesce_reg);
25775 peepreplace (leaL_rReg_rReg_peep());
25776 %}
25777
25778 peephole
25779 %{
25780 peeppredicate(VM_Version::supports_fast_2op_lea());
25781 peepmatch (addL_rReg_imm);
25782 peepprocedure (lea_coalesce_imm);
25783 peepreplace (leaL_rReg_immL32_peep());
25784 %}
25785
25786 peephole
25787 %{
25788 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25789 VM_Version::is_intel_cascade_lake());
25790 peepmatch (incL_rReg);
25791 peepprocedure (lea_coalesce_imm);
25792 peepreplace (leaL_rReg_immL32_peep());
25793 %}
25794
25795 peephole
25796 %{
25797 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25798 VM_Version::is_intel_cascade_lake());
25799 peepmatch (decL_rReg);
25800 peepprocedure (lea_coalesce_imm);
25801 peepreplace (leaL_rReg_immL32_peep());
25802 %}
25803
25804 peephole
25805 %{
25806 peeppredicate(VM_Version::supports_fast_2op_lea());
25807 peepmatch (salL_rReg_immI2);
25808 peepprocedure (lea_coalesce_imm);
25809 peepreplace (leaL_rReg_immI2_peep());
25810 %}
25811
25812 peephole
25813 %{
25814 peepmatch (leaPCompressedOopOffset);
25815 peepprocedure (lea_remove_redundant);
25816 %}
25817
25818 peephole
25819 %{
25820 peepmatch (leaP8Narrow);
25821 peepprocedure (lea_remove_redundant);
25822 %}
25823
25824 peephole
25825 %{
25826 peepmatch (leaP32Narrow);
25827 peepprocedure (lea_remove_redundant);
25828 %}
25829
25830 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25831 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25832
25833 //int variant
25834 peephole
25835 %{
25836 peepmatch (testI_reg);
25837 peepprocedure (test_may_remove);
25838 %}
25839
25840 //long variant
25841 peephole
25842 %{
25843 peepmatch (testL_reg);
25844 peepprocedure (test_may_remove);
25845 %}
25846
25847
25848 //----------SMARTSPILL RULES---------------------------------------------------
25849 // These must follow all instruction definitions as they use the names
25850 // defined in the instructions definitions.