1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 // Math.min() # Math.max()
1712 // --------------------------
1713 // ucomis[s/d] #
1714 // ja -> b # a
1715 // jp -> NaN # NaN
1716 // jb -> a # b
1717 // je #
1718 // |-jz -> a | b # a & b
1719 // | -> a #
1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1721 XMMRegister a, XMMRegister b,
1722 XMMRegister xmmt, Register rt,
1723 bool min, bool single) {
1724
1725 Label nan, zero, below, above, done;
1726
1727 if (single)
1728 __ ucomiss(a, b);
1729 else
1730 __ ucomisd(a, b);
1731
1732 if (dst->encoding() != (min ? b : a)->encoding())
1733 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1734 else
1735 __ jccb(Assembler::above, done);
1736
1737 __ jccb(Assembler::parity, nan); // PF=1
1738 __ jccb(Assembler::below, below); // CF=1
1739
1740 // equal
1741 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1742 if (single) {
1743 __ ucomiss(a, xmmt);
1744 __ jccb(Assembler::equal, zero);
1745
1746 __ movflt(dst, a);
1747 __ jmp(done);
1748 }
1749 else {
1750 __ ucomisd(a, xmmt);
1751 __ jccb(Assembler::equal, zero);
1752
1753 __ movdbl(dst, a);
1754 __ jmp(done);
1755 }
1756
1757 __ bind(zero);
1758 if (min)
1759 __ vpor(dst, a, b, Assembler::AVX_128bit);
1760 else
1761 __ vpand(dst, a, b, Assembler::AVX_128bit);
1762
1763 __ jmp(done);
1764
1765 __ bind(above);
1766 if (single)
1767 __ movflt(dst, min ? b : a);
1768 else
1769 __ movdbl(dst, min ? b : a);
1770
1771 __ jmp(done);
1772
1773 __ bind(nan);
1774 if (single) {
1775 __ movl(rt, 0x7fc00000); // Float.NaN
1776 __ movdl(dst, rt);
1777 }
1778 else {
1779 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1780 __ movdq(dst, rt);
1781 }
1782 __ jmp(done);
1783
1784 __ bind(below);
1785 if (single)
1786 __ movflt(dst, min ? a : b);
1787 else
1788 __ movdbl(dst, min ? a : b);
1789
1790 __ bind(done);
1791 }
1792
1793 //=============================================================================
1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1795
1796 int ConstantTable::calculate_table_base_offset() const {
1797 return 0; // absolute addressing, no offset
1798 }
1799
1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1802 ShouldNotReachHere();
1803 }
1804
1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1806 // Empty encoding
1807 }
1808
1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1810 return 0;
1811 }
1812
1813 #ifndef PRODUCT
1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1815 st->print("# MachConstantBaseNode (empty encoding)");
1816 }
1817 #endif
1818
1819
1820 //=============================================================================
1821 #ifndef PRODUCT
1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1823 Compile* C = ra_->C;
1824
1825 int framesize = C->output()->frame_size_in_bytes();
1826 int bangsize = C->output()->bang_size_in_bytes();
1827 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1828 // Remove wordSize for return addr which is already pushed.
1829 framesize -= wordSize;
1830
1831 if (C->output()->need_stack_bang(bangsize)) {
1832 framesize -= wordSize;
1833 st->print("# stack bang (%d bytes)", bangsize);
1834 st->print("\n\t");
1835 st->print("pushq rbp\t# Save rbp");
1836 if (PreserveFramePointer) {
1837 st->print("\n\t");
1838 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1839 }
1840 if (framesize) {
1841 st->print("\n\t");
1842 st->print("subq rsp, #%d\t# Create frame",framesize);
1843 }
1844 } else {
1845 st->print("subq rsp, #%d\t# Create frame",framesize);
1846 st->print("\n\t");
1847 framesize -= wordSize;
1848 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1849 if (PreserveFramePointer) {
1850 st->print("\n\t");
1851 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1852 if (framesize > 0) {
1853 st->print("\n\t");
1854 st->print("addq rbp, #%d", framesize);
1855 }
1856 }
1857 }
1858
1859 if (VerifyStackAtCalls) {
1860 st->print("\n\t");
1861 framesize -= wordSize;
1862 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1863 #ifdef ASSERT
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1939 framesize -= 2*wordSize;
1940
1941 if (framesize) {
1942 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1943 st->print("\t");
1944 }
1945
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2023 static enum RC rc_class(OptoReg::Name reg)
2024 {
2025 if( !OptoReg::is_valid(reg) ) return rc_bad;
2026
2027 if (OptoReg::is_stack(reg)) return rc_stack;
2028
2029 VMReg r = OptoReg::as_VMReg(reg);
2030
2031 if (r->is_Register()) return rc_int;
2032
2033 if (r->is_KRegister()) return rc_kreg;
2034
2035 assert(r->is_XMMRegister(), "must be");
2036 return rc_float;
2037 }
2038
2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2041 int src_hi, int dst_hi, uint ireg, outputStream* st);
2042
2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2044 int stack_offset, int reg, uint ireg, outputStream* st);
2045
2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2047 int dst_offset, uint ireg, outputStream* st) {
2048 if (masm) {
2049 switch (ireg) {
2050 case Op_VecS:
2051 __ movq(Address(rsp, -8), rax);
2052 __ movl(rax, Address(rsp, src_offset));
2053 __ movl(Address(rsp, dst_offset), rax);
2054 __ movq(rax, Address(rsp, -8));
2055 break;
2056 case Op_VecD:
2057 __ pushq(Address(rsp, src_offset));
2058 __ popq (Address(rsp, dst_offset));
2059 break;
2060 case Op_VecX:
2061 __ pushq(Address(rsp, src_offset));
2062 __ popq (Address(rsp, dst_offset));
2063 __ pushq(Address(rsp, src_offset+8));
2064 __ popq (Address(rsp, dst_offset+8));
2065 break;
2066 case Op_VecY:
2067 __ vmovdqu(Address(rsp, -32), xmm0);
2068 __ vmovdqu(xmm0, Address(rsp, src_offset));
2069 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2070 __ vmovdqu(xmm0, Address(rsp, -32));
2071 break;
2072 case Op_VecZ:
2073 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2074 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2075 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2076 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2077 break;
2078 default:
2079 ShouldNotReachHere();
2080 }
2081 #ifndef PRODUCT
2082 } else {
2083 switch (ireg) {
2084 case Op_VecS:
2085 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2086 "movl rax, [rsp + #%d]\n\t"
2087 "movl [rsp + #%d], rax\n\t"
2088 "movq rax, [rsp - #8]",
2089 src_offset, dst_offset);
2090 break;
2091 case Op_VecD:
2092 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2093 "popq [rsp + #%d]",
2094 src_offset, dst_offset);
2095 break;
2096 case Op_VecX:
2097 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2098 "popq [rsp + #%d]\n\t"
2099 "pushq [rsp + #%d]\n\t"
2100 "popq [rsp + #%d]",
2101 src_offset, dst_offset, src_offset+8, dst_offset+8);
2102 break;
2103 case Op_VecY:
2104 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2105 "vmovdqu xmm0, [rsp + #%d]\n\t"
2106 "vmovdqu [rsp + #%d], xmm0\n\t"
2107 "vmovdqu xmm0, [rsp - #32]",
2108 src_offset, dst_offset);
2109 break;
2110 case Op_VecZ:
2111 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2112 "vmovdqu xmm0, [rsp + #%d]\n\t"
2113 "vmovdqu [rsp + #%d], xmm0\n\t"
2114 "vmovdqu xmm0, [rsp - #64]",
2115 src_offset, dst_offset);
2116 break;
2117 default:
2118 ShouldNotReachHere();
2119 }
2120 #endif
2121 }
2122 }
2123
2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2125 PhaseRegAlloc* ra_,
2126 bool do_size,
2127 outputStream* st) const {
2128 assert(masm != nullptr || st != nullptr, "sanity");
2129 // Get registers to move
2130 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2131 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2132 OptoReg::Name dst_second = ra_->get_reg_second(this);
2133 OptoReg::Name dst_first = ra_->get_reg_first(this);
2134
2135 enum RC src_second_rc = rc_class(src_second);
2136 enum RC src_first_rc = rc_class(src_first);
2137 enum RC dst_second_rc = rc_class(dst_second);
2138 enum RC dst_first_rc = rc_class(dst_first);
2139
2140 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2141 "must move at least 1 register" );
2142
2143 if (src_first == dst_first && src_second == dst_second) {
2144 // Self copy, no move
2145 return 0;
2146 }
2147 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2148 uint ireg = ideal_reg();
2149 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2150 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2151 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2152 // mem -> mem
2153 int src_offset = ra_->reg2offset(src_first);
2154 int dst_offset = ra_->reg2offset(dst_first);
2155 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2156 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2157 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2158 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2159 int stack_offset = ra_->reg2offset(dst_first);
2160 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2161 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2162 int stack_offset = ra_->reg2offset(src_first);
2163 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2164 } else {
2165 ShouldNotReachHere();
2166 }
2167 return 0;
2168 }
2169 if (src_first_rc == rc_stack) {
2170 // mem ->
2171 if (dst_first_rc == rc_stack) {
2172 // mem -> mem
2173 assert(src_second != dst_first, "overlap");
2174 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2175 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2176 // 64-bit
2177 int src_offset = ra_->reg2offset(src_first);
2178 int dst_offset = ra_->reg2offset(dst_first);
2179 if (masm) {
2180 __ pushq(Address(rsp, src_offset));
2181 __ popq (Address(rsp, dst_offset));
2182 #ifndef PRODUCT
2183 } else {
2184 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2185 "popq [rsp + #%d]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 } else {
2190 // 32-bit
2191 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2192 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2193 // No pushl/popl, so:
2194 int src_offset = ra_->reg2offset(src_first);
2195 int dst_offset = ra_->reg2offset(dst_first);
2196 if (masm) {
2197 __ movq(Address(rsp, -8), rax);
2198 __ movl(rax, Address(rsp, src_offset));
2199 __ movl(Address(rsp, dst_offset), rax);
2200 __ movq(rax, Address(rsp, -8));
2201 #ifndef PRODUCT
2202 } else {
2203 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2204 "movl rax, [rsp + #%d]\n\t"
2205 "movl [rsp + #%d], rax\n\t"
2206 "movq rax, [rsp - #8]",
2207 src_offset, dst_offset);
2208 #endif
2209 }
2210 }
2211 return 0;
2212 } else if (dst_first_rc == rc_int) {
2213 // mem -> gpr
2214 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2215 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2216 // 64-bit
2217 int offset = ra_->reg2offset(src_first);
2218 if (masm) {
2219 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2220 #ifndef PRODUCT
2221 } else {
2222 st->print("movq %s, [rsp + #%d]\t# spill",
2223 Matcher::regName[dst_first],
2224 offset);
2225 #endif
2226 }
2227 } else {
2228 // 32-bit
2229 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2230 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2231 int offset = ra_->reg2offset(src_first);
2232 if (masm) {
2233 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2234 #ifndef PRODUCT
2235 } else {
2236 st->print("movl %s, [rsp + #%d]\t# spill",
2237 Matcher::regName[dst_first],
2238 offset);
2239 #endif
2240 }
2241 }
2242 return 0;
2243 } else if (dst_first_rc == rc_float) {
2244 // mem-> xmm
2245 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2246 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2247 // 64-bit
2248 int offset = ra_->reg2offset(src_first);
2249 if (masm) {
2250 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2251 #ifndef PRODUCT
2252 } else {
2253 st->print("%s %s, [rsp + #%d]\t# spill",
2254 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2255 Matcher::regName[dst_first],
2256 offset);
2257 #endif
2258 }
2259 } else {
2260 // 32-bit
2261 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2262 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("movss %s, [rsp + #%d]\t# spill",
2269 Matcher::regName[dst_first],
2270 offset);
2271 #endif
2272 }
2273 }
2274 return 0;
2275 } else if (dst_first_rc == rc_kreg) {
2276 // mem -> kreg
2277 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2278 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2279 // 64-bit
2280 int offset = ra_->reg2offset(src_first);
2281 if (masm) {
2282 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2283 #ifndef PRODUCT
2284 } else {
2285 st->print("kmovq %s, [rsp + #%d]\t# spill",
2286 Matcher::regName[dst_first],
2287 offset);
2288 #endif
2289 }
2290 }
2291 return 0;
2292 }
2293 } else if (src_first_rc == rc_int) {
2294 // gpr ->
2295 if (dst_first_rc == rc_stack) {
2296 // gpr -> mem
2297 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2298 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2299 // 64-bit
2300 int offset = ra_->reg2offset(dst_first);
2301 if (masm) {
2302 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2303 #ifndef PRODUCT
2304 } else {
2305 st->print("movq [rsp + #%d], %s\t# spill",
2306 offset,
2307 Matcher::regName[src_first]);
2308 #endif
2309 }
2310 } else {
2311 // 32-bit
2312 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2313 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2314 int offset = ra_->reg2offset(dst_first);
2315 if (masm) {
2316 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2317 #ifndef PRODUCT
2318 } else {
2319 st->print("movl [rsp + #%d], %s\t# spill",
2320 offset,
2321 Matcher::regName[src_first]);
2322 #endif
2323 }
2324 }
2325 return 0;
2326 } else if (dst_first_rc == rc_int) {
2327 // gpr -> gpr
2328 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2329 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2330 // 64-bit
2331 if (masm) {
2332 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2333 as_Register(Matcher::_regEncode[src_first]));
2334 #ifndef PRODUCT
2335 } else {
2336 st->print("movq %s, %s\t# spill",
2337 Matcher::regName[dst_first],
2338 Matcher::regName[src_first]);
2339 #endif
2340 }
2341 return 0;
2342 } else {
2343 // 32-bit
2344 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2345 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2346 if (masm) {
2347 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movl %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 }
2358 } else if (dst_first_rc == rc_float) {
2359 // gpr -> xmm
2360 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2361 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2362 // 64-bit
2363 if (masm) {
2364 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2365 #ifndef PRODUCT
2366 } else {
2367 st->print("movdq %s, %s\t# spill",
2368 Matcher::regName[dst_first],
2369 Matcher::regName[src_first]);
2370 #endif
2371 }
2372 } else {
2373 // 32-bit
2374 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2375 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2376 if (masm) {
2377 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2378 #ifndef PRODUCT
2379 } else {
2380 st->print("movdl %s, %s\t# spill",
2381 Matcher::regName[dst_first],
2382 Matcher::regName[src_first]);
2383 #endif
2384 }
2385 }
2386 return 0;
2387 } else if (dst_first_rc == rc_kreg) {
2388 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2389 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2390 // 64-bit
2391 if (masm) {
2392 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("kmovq %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 Unimplemented();
2402 return 0;
2403 }
2404 } else if (src_first_rc == rc_float) {
2405 // xmm ->
2406 if (dst_first_rc == rc_stack) {
2407 // xmm -> mem
2408 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2409 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2410 // 64-bit
2411 int offset = ra_->reg2offset(dst_first);
2412 if (masm) {
2413 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2414 #ifndef PRODUCT
2415 } else {
2416 st->print("movsd [rsp + #%d], %s\t# spill",
2417 offset,
2418 Matcher::regName[src_first]);
2419 #endif
2420 }
2421 } else {
2422 // 32-bit
2423 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2424 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2425 int offset = ra_->reg2offset(dst_first);
2426 if (masm) {
2427 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2428 #ifndef PRODUCT
2429 } else {
2430 st->print("movss [rsp + #%d], %s\t# spill",
2431 offset,
2432 Matcher::regName[src_first]);
2433 #endif
2434 }
2435 }
2436 return 0;
2437 } else if (dst_first_rc == rc_int) {
2438 // xmm -> gpr
2439 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2440 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2441 // 64-bit
2442 if (masm) {
2443 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2444 #ifndef PRODUCT
2445 } else {
2446 st->print("movdq %s, %s\t# spill",
2447 Matcher::regName[dst_first],
2448 Matcher::regName[src_first]);
2449 #endif
2450 }
2451 } else {
2452 // 32-bit
2453 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2454 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2455 if (masm) {
2456 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2457 #ifndef PRODUCT
2458 } else {
2459 st->print("movdl %s, %s\t# spill",
2460 Matcher::regName[dst_first],
2461 Matcher::regName[src_first]);
2462 #endif
2463 }
2464 }
2465 return 0;
2466 } else if (dst_first_rc == rc_float) {
2467 // xmm -> xmm
2468 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2469 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2470 // 64-bit
2471 if (masm) {
2472 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2473 #ifndef PRODUCT
2474 } else {
2475 st->print("%s %s, %s\t# spill",
2476 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2477 Matcher::regName[dst_first],
2478 Matcher::regName[src_first]);
2479 #endif
2480 }
2481 } else {
2482 // 32-bit
2483 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2484 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2485 if (masm) {
2486 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2487 #ifndef PRODUCT
2488 } else {
2489 st->print("%s %s, %s\t# spill",
2490 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2491 Matcher::regName[dst_first],
2492 Matcher::regName[src_first]);
2493 #endif
2494 }
2495 }
2496 return 0;
2497 } else if (dst_first_rc == rc_kreg) {
2498 assert(false, "Illegal spilling");
2499 return 0;
2500 }
2501 } else if (src_first_rc == rc_kreg) {
2502 if (dst_first_rc == rc_stack) {
2503 // mem -> kreg
2504 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2505 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2506 // 64-bit
2507 int offset = ra_->reg2offset(dst_first);
2508 if (masm) {
2509 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2510 #ifndef PRODUCT
2511 } else {
2512 st->print("kmovq [rsp + #%d] , %s\t# spill",
2513 offset,
2514 Matcher::regName[src_first]);
2515 #endif
2516 }
2517 }
2518 return 0;
2519 } else if (dst_first_rc == rc_int) {
2520 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2521 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2522 // 64-bit
2523 if (masm) {
2524 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq %s, %s\t# spill",
2528 Matcher::regName[dst_first],
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 Unimplemented();
2534 return 0;
2535 } else if (dst_first_rc == rc_kreg) {
2536 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2537 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2538 // 64-bit
2539 if (masm) {
2540 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2541 #ifndef PRODUCT
2542 } else {
2543 st->print("kmovq %s, %s\t# spill",
2544 Matcher::regName[dst_first],
2545 Matcher::regName[src_first]);
2546 #endif
2547 }
2548 }
2549 return 0;
2550 } else if (dst_first_rc == rc_float) {
2551 assert(false, "Illegal spill");
2552 return 0;
2553 }
2554 }
2555
2556 assert(0," foo ");
2557 Unimplemented();
2558 return 0;
2559 }
2560
2561 #ifndef PRODUCT
2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2563 implementation(nullptr, ra_, false, st);
2564 }
2565 #endif
2566
2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2568 implementation(masm, ra_, false, nullptr);
2569 }
2570
2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2572 return MachNode::size(ra_);
2573 }
2574
2575 //=============================================================================
2576 #ifndef PRODUCT
2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2578 {
2579 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2580 int reg = ra_->get_reg_first(this);
2581 st->print("leaq %s, [rsp + #%d]\t# box lock",
2582 Matcher::regName[reg], offset);
2583 }
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
2650
2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2652 int oper_index) {
2653 if (mdef == nullptr) {
2654 return false;
2655 }
2656
2657 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2658 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2659 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2660 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2661 return false;
2662 }
2663
2664 // Complex memory operand covers multiple incoming edges needed for
2665 // address computation. Biasing def towards any address component will not
2666 // result in NDD demotion by assembler.
2667 if (mdef->operand_num_edges(oper_index) != 1) {
2668 return false;
2669 }
2670
2671 // Demotion candidate must be register mask compatible with definition.
2672 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2673 if (!oper_mask.overlap(mdef->out_RegMask())) {
2674 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2675 return false;
2676 }
2677
2678 switch (oper_index) {
2679 // First operand of MachNode corresponding to Intel APX NDD selection
2680 // pattern can share its assigned register with definition operand if
2681 // their live ranges do not overlap. In such a scenario we can demote
2682 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2683 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2684 // are decorated with a special flag by instruction selector.
2685 case 1:
2686 return is_ndd_demotable_opr1(mdef);
2687
2688 // Definition operand of commutative operation can be biased towards second
2689 // operand.
2690 case 2:
2691 return is_ndd_demotable_opr2(mdef);
2692
2693 // Current scheme only selects up to two biasing candidates
2694 default:
2695 assert(false, "unhandled operand index: %s", mdef->Name());
2696 break;
2697 }
2698
2699 return false;
2700 }
2701
2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2703 assert(EnableVectorSupport, "sanity");
2704 int lo = XMM0_num;
2705 int hi = XMM0b_num;
2706 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2707 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2708 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2709 return OptoRegPair(hi, lo);
2710 }
2711
2712 // Is this branch offset short enough that a short branch can be used?
2713 //
2714 // NOTE: If the platform does not provide any short branch variants, then
2715 // this method should return false for offset 0.
2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2717 // The passed offset is relative to address of the branch.
2718 // On 86 a branch displacement is calculated relative to address
2719 // of a next instruction.
2720 offset -= br_size;
2721
2722 // the short version of jmpConUCF2 contains multiple branches,
2723 // making the reach slightly less
2724 if (rule == jmpConUCF2_rule)
2725 return (-126 <= offset && offset <= 125);
2726 return (-128 <= offset && offset <= 127);
2727 }
2728
2729 #ifdef ASSERT
2730 // Return whether or not this register is ever used as an argument.
2731 bool Matcher::can_be_java_arg(int reg)
2732 {
2733 return
2734 reg == RDI_num || reg == RDI_H_num ||
2735 reg == RSI_num || reg == RSI_H_num ||
2736 reg == RDX_num || reg == RDX_H_num ||
2737 reg == RCX_num || reg == RCX_H_num ||
2738 reg == R8_num || reg == R8_H_num ||
2739 reg == R9_num || reg == R9_H_num ||
2740 reg == R12_num || reg == R12_H_num ||
2741 reg == XMM0_num || reg == XMM0b_num ||
2742 reg == XMM1_num || reg == XMM1b_num ||
2743 reg == XMM2_num || reg == XMM2b_num ||
2744 reg == XMM3_num || reg == XMM3b_num ||
2745 reg == XMM4_num || reg == XMM4b_num ||
2746 reg == XMM5_num || reg == XMM5b_num ||
2747 reg == XMM6_num || reg == XMM6b_num ||
2748 reg == XMM7_num || reg == XMM7b_num;
2749 }
2750 #endif
2751
2752 uint Matcher::int_pressure_limit()
2753 {
2754 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2755 }
2756
2757 uint Matcher::float_pressure_limit()
2758 {
2759 // After experiment around with different values, the following default threshold
2760 // works best for LCM's register pressure scheduling on x64.
2761 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2762 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2763 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2764 }
2765
2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2767 // In 64 bit mode a code which use multiply when
2768 // devisor is constant is faster than hardware
2769 // DIV instruction (it uses MulHiL).
2770 return false;
2771 }
2772
2773 // Register for DIVI projection of divmodI
2774 const RegMask& Matcher::divI_proj_mask() {
2775 return INT_RAX_REG_mask();
2776 }
2777
2778 // Register for MODI projection of divmodI
2779 const RegMask& Matcher::modI_proj_mask() {
2780 return INT_RDX_REG_mask();
2781 }
2782
2783 // Register for DIVL projection of divmodL
2784 const RegMask& Matcher::divL_proj_mask() {
2785 return LONG_RAX_REG_mask();
2786 }
2787
2788 // Register for MODL projection of divmodL
2789 const RegMask& Matcher::modL_proj_mask() {
2790 return LONG_RDX_REG_mask();
2791 }
2792
2793 %}
2794
2795 source_hpp %{
2796 // Header information of the source block.
2797 // Method declarations/definitions which are used outside
2798 // the ad-scope can conveniently be defined here.
2799 //
2800 // To keep related declarations/definitions/uses close together,
2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2802
2803 #include "runtime/vm_version.hpp"
2804
2805 class NativeJump;
2806
2807 class CallStubImpl {
2808
2809 //--------------------------------------------------------------
2810 //---< Used for optimization in Compile::shorten_branches >---
2811 //--------------------------------------------------------------
2812
2813 public:
2814 // Size of call trampoline stub.
2815 static uint size_call_trampoline() {
2816 return 0; // no call trampolines on this platform
2817 }
2818
2819 // number of relocations needed by a call trampoline stub
2820 static uint reloc_call_trampoline() {
2821 return 0; // no call trampolines on this platform
2822 }
2823 };
2824
2825 class HandlerImpl {
2826
2827 public:
2828
2829 static int emit_deopt_handler(C2_MacroAssembler* masm);
2830
2831 static uint size_deopt_handler() {
2832 // one call and one jmp.
2833 return 7;
2834 }
2835 };
2836
2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2838 switch(bytes) {
2839 case 4: // fall-through
2840 case 8: // fall-through
2841 case 16: return Assembler::AVX_128bit;
2842 case 32: return Assembler::AVX_256bit;
2843 case 64: return Assembler::AVX_512bit;
2844
2845 default: {
2846 ShouldNotReachHere();
2847 return Assembler::AVX_NoVec;
2848 }
2849 }
2850 }
2851
2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2853 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2854 }
2855
2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2857 uint def_idx = use->operand_index(opnd);
2858 Node* def = use->in(def_idx);
2859 return vector_length_encoding(def);
2860 }
2861
2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
2863 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2864 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2865 }
2866
2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2868 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2869 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2870 }
2871
2872 class Node::PD {
2873 public:
2874 enum NodeFlags : uint64_t {
2875 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2876 Flag_sets_carry_flag = Node::_last_flag << 2,
2877 Flag_sets_parity_flag = Node::_last_flag << 3,
2878 Flag_sets_zero_flag = Node::_last_flag << 4,
2879 Flag_sets_overflow_flag = Node::_last_flag << 5,
2880 Flag_sets_sign_flag = Node::_last_flag << 6,
2881 Flag_clears_carry_flag = Node::_last_flag << 7,
2882 Flag_clears_parity_flag = Node::_last_flag << 8,
2883 Flag_clears_zero_flag = Node::_last_flag << 9,
2884 Flag_clears_overflow_flag = Node::_last_flag << 10,
2885 Flag_clears_sign_flag = Node::_last_flag << 11,
2886 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2887 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2888 _last_flag = Flag_ndd_demotable_opr2
2889 };
2890 };
2891
2892 %} // end source_hpp
2893
2894 source %{
2895
2896 #include "opto/addnode.hpp"
2897 #include "c2_intelJccErratum_x86.hpp"
2898
2899 void PhaseOutput::pd_perform_mach_node_analysis() {
2900 if (VM_Version::has_intel_jcc_erratum()) {
2901 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2902 _buf_sizes._code += extra_padding;
2903 }
2904 }
2905
2906 int MachNode::pd_alignment_required() const {
2907 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2908 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2909 return IntelJccErratum::largest_jcc_size() + 1;
2910 } else {
2911 return 1;
2912 }
2913 }
2914
2915 int MachNode::compute_padding(int current_offset) const {
2916 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2917 Compile* C = Compile::current();
2918 PhaseOutput* output = C->output();
2919 Block* block = output->block();
2920 int index = output->index();
2921 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2922 } else {
2923 return 0;
2924 }
2925 }
2926
2927 // Emit deopt handler code.
2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2929
2930 // Note that the code buffer's insts_mark is always relative to insts.
2931 // That's why we must use the macroassembler to generate a handler.
2932 address base = __ start_a_stub(size_deopt_handler());
2933 if (base == nullptr) {
2934 ciEnv::current()->record_failure("CodeCache is full");
2935 return 0; // CodeBuffer::expand failed
2936 }
2937 int offset = __ offset();
2938
2939 Label start;
2940 __ bind(start);
2941
2942 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2943
2944 int entry_offset = __ offset();
2945
2946 __ jmp(start);
2947
2948 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2949 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2950 "out of bounds read in post-call NOP check");
2951 __ end_a_stub();
2952 return entry_offset;
2953 }
2954
2955 static Assembler::Width widthForType(BasicType bt) {
2956 if (bt == T_BYTE) {
2957 return Assembler::B;
2958 } else if (bt == T_SHORT) {
2959 return Assembler::W;
2960 } else if (bt == T_INT) {
2961 return Assembler::D;
2962 } else {
2963 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2964 return Assembler::Q;
2965 }
2966 }
2967
2968 //=============================================================================
2969
2970 // Float masks come from different places depending on platform.
2971 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2972 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2973 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2974 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2975 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2976 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2977 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2978 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2979 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2980 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2981 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2982 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2983 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2984 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2985 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2986 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2987 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2988 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2989 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2990
2991 //=============================================================================
2992 bool Matcher::match_rule_supported(int opcode) {
2993 if (!has_match_rule(opcode)) {
2994 return false; // no match rule present
2995 }
2996 switch (opcode) {
2997 case Op_AbsVL:
2998 case Op_StoreVectorScatter:
2999 if (UseAVX < 3) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountI:
3004 case Op_PopCountL:
3005 if (!UsePopCountInstruction) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountVI:
3010 if (UseAVX < 2) {
3011 return false;
3012 }
3013 break;
3014 case Op_CompressV:
3015 case Op_ExpandV:
3016 case Op_PopCountVL:
3017 if (UseAVX < 2) {
3018 return false;
3019 }
3020 break;
3021 case Op_MulVI:
3022 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3023 return false;
3024 }
3025 break;
3026 case Op_MulVL:
3027 if (UseSSE < 4) { // only with SSE4_1 or AVX
3028 return false;
3029 }
3030 break;
3031 case Op_MulReductionVL:
3032 if (VM_Version::supports_avx512dq() == false) {
3033 return false;
3034 }
3035 break;
3036 case Op_AbsVB:
3037 case Op_AbsVS:
3038 case Op_AbsVI:
3039 case Op_AddReductionVI:
3040 case Op_AndReductionV:
3041 case Op_OrReductionV:
3042 case Op_XorReductionV:
3043 if (UseSSE < 3) { // requires at least SSSE3
3044 return false;
3045 }
3046 break;
3047 case Op_MaxHF:
3048 case Op_MinHF:
3049 if (!VM_Version::supports_avx512vlbw()) {
3050 return false;
3051 } // fallthrough
3052 case Op_AddHF:
3053 case Op_DivHF:
3054 case Op_FmaHF:
3055 case Op_MulHF:
3056 case Op_ReinterpretS2HF:
3057 case Op_ReinterpretHF2S:
3058 case Op_SubHF:
3059 case Op_SqrtHF:
3060 if (!VM_Version::supports_avx512_fp16()) {
3061 return false;
3062 }
3063 break;
3064 case Op_VectorLoadShuffle:
3065 case Op_VectorRearrange:
3066 case Op_MulReductionVI:
3067 if (UseSSE < 4) { // requires at least SSE4
3068 return false;
3069 }
3070 break;
3071 case Op_IsInfiniteF:
3072 case Op_IsInfiniteD:
3073 if (!VM_Version::supports_avx512dq()) {
3074 return false;
3075 }
3076 break;
3077 case Op_SqrtVD:
3078 case Op_SqrtVF:
3079 case Op_VectorMaskCmp:
3080 case Op_VectorCastB2X:
3081 case Op_VectorCastS2X:
3082 case Op_VectorCastI2X:
3083 case Op_VectorCastL2X:
3084 case Op_VectorCastF2X:
3085 case Op_VectorCastD2X:
3086 case Op_VectorUCastB2X:
3087 case Op_VectorUCastS2X:
3088 case Op_VectorUCastI2X:
3089 case Op_VectorMaskCast:
3090 if (UseAVX < 1) { // enabled for AVX only
3091 return false;
3092 }
3093 break;
3094 case Op_PopulateIndex:
3095 if (UseAVX < 2) {
3096 return false;
3097 }
3098 break;
3099 case Op_RoundVF:
3100 if (UseAVX < 2) { // enabled for AVX2 only
3101 return false;
3102 }
3103 break;
3104 case Op_RoundVD:
3105 if (UseAVX < 3) {
3106 return false; // enabled for AVX3 only
3107 }
3108 break;
3109 case Op_CompareAndSwapL:
3110 case Op_CompareAndSwapP:
3111 break;
3112 case Op_StrIndexOf:
3113 if (!UseSSE42Intrinsics) {
3114 return false;
3115 }
3116 break;
3117 case Op_StrIndexOfChar:
3118 if (!UseSSE42Intrinsics) {
3119 return false;
3120 }
3121 break;
3122 case Op_OnSpinWait:
3123 if (VM_Version::supports_on_spin_wait() == false) {
3124 return false;
3125 }
3126 break;
3127 case Op_MulVB:
3128 case Op_LShiftVB:
3129 case Op_RShiftVB:
3130 case Op_URShiftVB:
3131 case Op_VectorInsert:
3132 case Op_VectorLoadMask:
3133 case Op_VectorStoreMask:
3134 case Op_VectorBlend:
3135 if (UseSSE < 4) {
3136 return false;
3137 }
3138 break;
3139 case Op_MaxD:
3140 case Op_MaxF:
3141 case Op_MinD:
3142 case Op_MinF:
3143 if (UseAVX < 1) { // enabled for AVX only
3144 return false;
3145 }
3146 break;
3147 case Op_CacheWB:
3148 case Op_CacheWBPreSync:
3149 case Op_CacheWBPostSync:
3150 if (!VM_Version::supports_data_cache_line_flush()) {
3151 return false;
3152 }
3153 break;
3154 case Op_ExtractB:
3155 case Op_ExtractL:
3156 case Op_ExtractI:
3157 case Op_RoundDoubleMode:
3158 if (UseSSE < 4) {
3159 return false;
3160 }
3161 break;
3162 case Op_RoundDoubleModeV:
3163 if (VM_Version::supports_avx() == false) {
3164 return false; // 128bit vroundpd is not available
3165 }
3166 break;
3167 case Op_LoadVectorGather:
3168 case Op_LoadVectorGatherMasked:
3169 if (UseAVX < 2) {
3170 return false;
3171 }
3172 break;
3173 case Op_FmaF:
3174 case Op_FmaD:
3175 case Op_FmaVD:
3176 case Op_FmaVF:
3177 if (!UseFMA) {
3178 return false;
3179 }
3180 break;
3181 case Op_MacroLogicV:
3182 if (UseAVX < 3 || !UseVectorMacroLogic) {
3183 return false;
3184 }
3185 break;
3186
3187 case Op_VectorCmpMasked:
3188 case Op_VectorMaskGen:
3189 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3190 return false;
3191 }
3192 break;
3193 case Op_VectorMaskFirstTrue:
3194 case Op_VectorMaskLastTrue:
3195 case Op_VectorMaskTrueCount:
3196 case Op_VectorMaskToLong:
3197 if (UseAVX < 1) {
3198 return false;
3199 }
3200 break;
3201 case Op_RoundF:
3202 case Op_RoundD:
3203 break;
3204 case Op_CopySignD:
3205 case Op_CopySignF:
3206 if (UseAVX < 3) {
3207 return false;
3208 }
3209 if (!VM_Version::supports_avx512vl()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressBits:
3214 case Op_ExpandBits:
3215 if (!VM_Version::supports_bmi2()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressM:
3220 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3221 return false;
3222 }
3223 break;
3224 case Op_ConvF2HF:
3225 case Op_ConvHF2F:
3226 if (!VM_Version::supports_float16()) {
3227 return false;
3228 }
3229 break;
3230 case Op_VectorCastF2HF:
3231 case Op_VectorCastHF2F:
3232 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3233 return false;
3234 }
3235 break;
3236 }
3237 return true; // Match rules are supported by default.
3238 }
3239
3240 //------------------------------------------------------------------------
3241
3242 static inline bool is_pop_count_instr_target(BasicType bt) {
3243 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3244 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3245 }
3246
3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3248 return match_rule_supported_vector(opcode, vlen, bt);
3249 }
3250
3251 // Identify extra cases that we might want to provide match rules for vector nodes and
3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3254 if (!match_rule_supported(opcode)) {
3255 return false;
3256 }
3257 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3258 // * SSE2 supports 128bit vectors for all types;
3259 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3260 // * AVX2 supports 256bit vectors for all types;
3261 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3262 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3263 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3264 // And MaxVectorSize is taken into account as well.
3265 if (!vector_size_supported(bt, vlen)) {
3266 return false;
3267 }
3268 // Special cases which require vector length follow:
3269 // * implementation limitations
3270 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3271 // * 128bit vroundpd instruction is present only in AVX1
3272 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3273 switch (opcode) {
3274 case Op_MaxVHF:
3275 case Op_MinVHF:
3276 if (!VM_Version::supports_avx512bw()) {
3277 return false;
3278 }
3279 case Op_AddVHF:
3280 case Op_DivVHF:
3281 case Op_FmaVHF:
3282 case Op_MulVHF:
3283 case Op_SubVHF:
3284 case Op_SqrtVHF:
3285 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3286 return false;
3287 }
3288 if (!VM_Version::supports_avx512_fp16()) {
3289 return false;
3290 }
3291 break;
3292 case Op_AbsVF:
3293 case Op_NegVF:
3294 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vandps and vxorps are not available
3296 }
3297 break;
3298 case Op_AbsVD:
3299 case Op_NegVD:
3300 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3302 }
3303 break;
3304 case Op_RotateRightV:
3305 case Op_RotateLeftV:
3306 if (bt != T_INT && bt != T_LONG) {
3307 return false;
3308 } // fallthrough
3309 case Op_MacroLogicV:
3310 if (!VM_Version::supports_evex() ||
3311 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3312 return false;
3313 }
3314 break;
3315 case Op_ClearArray:
3316 case Op_VectorMaskGen:
3317 case Op_VectorCmpMasked:
3318 if (!VM_Version::supports_avx512bw()) {
3319 return false;
3320 }
3321 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3322 return false;
3323 }
3324 break;
3325 case Op_LoadVectorMasked:
3326 case Op_StoreVectorMasked:
3327 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinV:
3332 case Op_UMaxV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinReductionV:
3338 case Op_UMaxReductionV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3343 return false;
3344 }
3345 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 break;
3349 case Op_MaxV:
3350 case Op_MinV:
3351 if (UseSSE < 4 && is_integral_type(bt)) {
3352 return false;
3353 }
3354 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3355 // Float/Double intrinsics are enabled for AVX family currently.
3356 if (UseAVX == 0) {
3357 return false;
3358 }
3359 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3360 return false;
3361 }
3362 }
3363 break;
3364 case Op_CallLeafVector:
3365 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3366 return false;
3367 }
3368 break;
3369 case Op_AddReductionVI:
3370 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3371 return false;
3372 }
3373 // fallthrough
3374 case Op_AndReductionV:
3375 case Op_OrReductionV:
3376 case Op_XorReductionV:
3377 if (is_subword_type(bt) && (UseSSE < 4)) {
3378 return false;
3379 }
3380 break;
3381 case Op_MinReductionV:
3382 case Op_MaxReductionV:
3383 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3384 return false;
3385 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3386 return false;
3387 }
3388 // Float/Double intrinsics enabled for AVX family.
3389 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3390 return false;
3391 }
3392 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3393 return false;
3394 }
3395 break;
3396 case Op_VectorBlend:
3397 if (UseAVX == 0 && size_in_bits < 128) {
3398 return false;
3399 }
3400 break;
3401 case Op_VectorTest:
3402 if (UseSSE < 4) {
3403 return false; // Implementation limitation
3404 } else if (size_in_bits < 32) {
3405 return false; // Implementation limitation
3406 }
3407 break;
3408 case Op_VectorLoadShuffle:
3409 case Op_VectorRearrange:
3410 if(vlen == 2) {
3411 return false; // Implementation limitation due to how shuffle is loaded
3412 } else if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 break;
3416 case Op_VectorLoadMask:
3417 case Op_VectorMaskCast:
3418 if (size_in_bits == 256 && UseAVX < 2) {
3419 return false; // Implementation limitation
3420 }
3421 // fallthrough
3422 case Op_VectorStoreMask:
3423 if (vlen == 2) {
3424 return false; // Implementation limitation
3425 }
3426 break;
3427 case Op_PopulateIndex:
3428 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3429 return false;
3430 }
3431 break;
3432 case Op_VectorCastB2X:
3433 case Op_VectorCastS2X:
3434 case Op_VectorCastI2X:
3435 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3436 return false;
3437 }
3438 break;
3439 case Op_VectorCastL2X:
3440 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3441 return false;
3442 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3443 return false;
3444 }
3445 break;
3446 case Op_VectorCastF2X: {
3447 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3448 // happen after intermediate conversion to integer and special handling
3449 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3450 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3451 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3452 return false;
3453 }
3454 }
3455 // fallthrough
3456 case Op_VectorCastD2X:
3457 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3458 return false;
3459 }
3460 break;
3461 case Op_VectorCastF2HF:
3462 case Op_VectorCastHF2F:
3463 if (!VM_Version::supports_f16c() &&
3464 ((!VM_Version::supports_evex() ||
3465 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3466 return false;
3467 }
3468 break;
3469 case Op_RoundVD:
3470 if (!VM_Version::supports_avx512dq()) {
3471 return false;
3472 }
3473 break;
3474 case Op_MulReductionVI:
3475 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3476 return false;
3477 }
3478 break;
3479 case Op_LoadVectorGatherMasked:
3480 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3481 return false;
3482 }
3483 if (is_subword_type(bt) &&
3484 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3485 (size_in_bits < 64) ||
3486 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3487 return false;
3488 }
3489 break;
3490 case Op_StoreVectorScatterMasked:
3491 case Op_StoreVectorScatter:
3492 if (is_subword_type(bt)) {
3493 return false;
3494 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3495 return false;
3496 }
3497 // fallthrough
3498 case Op_LoadVectorGather:
3499 if (!is_subword_type(bt) && size_in_bits == 64) {
3500 return false;
3501 }
3502 if (is_subword_type(bt) && size_in_bits < 64) {
3503 return false;
3504 }
3505 break;
3506 case Op_SaturatingAddV:
3507 case Op_SaturatingSubV:
3508 if (UseAVX < 1) {
3509 return false; // Implementation limitation
3510 }
3511 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3512 return false;
3513 }
3514 break;
3515 case Op_SelectFromTwoVector:
3516 if (size_in_bits < 128) {
3517 return false;
3518 }
3519 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3520 return false;
3521 }
3522 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3523 return false;
3524 }
3525 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3526 return false;
3527 }
3528 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3529 return false;
3530 }
3531 break;
3532 case Op_MaskAll:
3533 if (!VM_Version::supports_evex()) {
3534 return false;
3535 }
3536 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3537 return false;
3538 }
3539 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3540 return false;
3541 }
3542 break;
3543 case Op_VectorMaskCmp:
3544 if (vlen < 2 || size_in_bits < 32) {
3545 return false;
3546 }
3547 break;
3548 case Op_CompressM:
3549 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3550 return false;
3551 }
3552 break;
3553 case Op_CompressV:
3554 case Op_ExpandV:
3555 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3556 return false;
3557 }
3558 if (size_in_bits < 128 ) {
3559 return false;
3560 }
3561 case Op_VectorLongToMask:
3562 if (UseAVX < 1) {
3563 return false;
3564 }
3565 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3566 return false;
3567 }
3568 break;
3569 case Op_SignumVD:
3570 case Op_SignumVF:
3571 if (UseAVX < 1) {
3572 return false;
3573 }
3574 break;
3575 case Op_PopCountVI:
3576 case Op_PopCountVL: {
3577 if (!is_pop_count_instr_target(bt) &&
3578 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3579 return false;
3580 }
3581 }
3582 break;
3583 case Op_ReverseV:
3584 case Op_ReverseBytesV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 case Op_CountTrailingZerosV:
3590 case Op_CountLeadingZerosV:
3591 if (UseAVX < 2) {
3592 return false;
3593 }
3594 break;
3595 }
3596 return true; // Per default match rules are supported.
3597 }
3598
3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3600 // ADLC based match_rule_supported routine checks for the existence of pattern based
3601 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3602 // of their non-masked counterpart with mask edge being the differentiator.
3603 // This routine does a strict check on the existence of masked operation patterns
3604 // by returning a default false value for all the other opcodes apart from the
3605 // ones whose masked instruction patterns are defined in this file.
3606 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3607 return false;
3608 }
3609
3610 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3611 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3612 return false;
3613 }
3614 switch(opcode) {
3615 // Unary masked operations
3616 case Op_AbsVB:
3617 case Op_AbsVS:
3618 if(!VM_Version::supports_avx512bw()) {
3619 return false; // Implementation limitation
3620 }
3621 case Op_AbsVI:
3622 case Op_AbsVL:
3623 return true;
3624
3625 // Ternary masked operations
3626 case Op_FmaVF:
3627 case Op_FmaVD:
3628 return true;
3629
3630 case Op_MacroLogicV:
3631 if(bt != T_INT && bt != T_LONG) {
3632 return false;
3633 }
3634 return true;
3635
3636 // Binary masked operations
3637 case Op_AddVB:
3638 case Op_AddVS:
3639 case Op_SubVB:
3640 case Op_SubVS:
3641 case Op_MulVS:
3642 case Op_LShiftVS:
3643 case Op_RShiftVS:
3644 case Op_URShiftVS:
3645 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3646 if (!VM_Version::supports_avx512bw()) {
3647 return false; // Implementation limitation
3648 }
3649 return true;
3650
3651 case Op_MulVL:
3652 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3653 if (!VM_Version::supports_avx512dq()) {
3654 return false; // Implementation limitation
3655 }
3656 return true;
3657
3658 case Op_AndV:
3659 case Op_OrV:
3660 case Op_XorV:
3661 case Op_RotateRightV:
3662 case Op_RotateLeftV:
3663 if (bt != T_INT && bt != T_LONG) {
3664 return false; // Implementation limitation
3665 }
3666 return true;
3667
3668 case Op_VectorLoadMask:
3669 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3670 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3671 return false;
3672 }
3673 return true;
3674
3675 case Op_AddVI:
3676 case Op_AddVL:
3677 case Op_AddVF:
3678 case Op_AddVD:
3679 case Op_SubVI:
3680 case Op_SubVL:
3681 case Op_SubVF:
3682 case Op_SubVD:
3683 case Op_MulVI:
3684 case Op_MulVF:
3685 case Op_MulVD:
3686 case Op_DivVF:
3687 case Op_DivVD:
3688 case Op_SqrtVF:
3689 case Op_SqrtVD:
3690 case Op_LShiftVI:
3691 case Op_LShiftVL:
3692 case Op_RShiftVI:
3693 case Op_RShiftVL:
3694 case Op_URShiftVI:
3695 case Op_URShiftVL:
3696 case Op_LoadVectorMasked:
3697 case Op_StoreVectorMasked:
3698 case Op_LoadVectorGatherMasked:
3699 case Op_StoreVectorScatterMasked:
3700 return true;
3701
3702 case Op_UMinV:
3703 case Op_UMaxV:
3704 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3705 return false;
3706 } // fallthrough
3707 case Op_MaxV:
3708 case Op_MinV:
3709 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3710 return false; // Implementation limitation
3711 }
3712 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3713 return false; // Implementation limitation
3714 }
3715 return true;
3716 case Op_SaturatingAddV:
3717 case Op_SaturatingSubV:
3718 if (!is_subword_type(bt)) {
3719 return false;
3720 }
3721 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorMaskCmp:
3727 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731
3732 case Op_VectorRearrange:
3733 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3734 return false; // Implementation limitation
3735 }
3736 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3737 return false; // Implementation limitation
3738 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3739 return false; // Implementation limitation
3740 }
3741 return true;
3742
3743 // Binary Logical operations
3744 case Op_AndVMask:
3745 case Op_OrVMask:
3746 case Op_XorVMask:
3747 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3748 return false; // Implementation limitation
3749 }
3750 return true;
3751
3752 case Op_PopCountVI:
3753 case Op_PopCountVL:
3754 if (!is_pop_count_instr_target(bt)) {
3755 return false;
3756 }
3757 return true;
3758
3759 case Op_MaskAll:
3760 return true;
3761
3762 case Op_CountLeadingZerosV:
3763 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3764 return true;
3765 }
3766 default:
3767 return false;
3768 }
3769 }
3770
3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3772 return false;
3773 }
3774
3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3777 switch (elem_bt) {
3778 case T_BYTE: return false;
3779 case T_SHORT: return !VM_Version::supports_avx512bw();
3780 case T_INT: return !VM_Version::supports_avx();
3781 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3782 default:
3783 ShouldNotReachHere();
3784 return false;
3785 }
3786 }
3787
3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3789 // Prefer predicate if the mask type is "TypeVectMask".
3790 return vt->isa_vectmask() != nullptr;
3791 }
3792
3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3794 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3795 bool legacy = (generic_opnd->opcode() == LEGVEC);
3796 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3797 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3798 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3799 return new legVecZOper();
3800 }
3801 if (legacy) {
3802 switch (ideal_reg) {
3803 case Op_VecS: return new legVecSOper();
3804 case Op_VecD: return new legVecDOper();
3805 case Op_VecX: return new legVecXOper();
3806 case Op_VecY: return new legVecYOper();
3807 case Op_VecZ: return new legVecZOper();
3808 }
3809 } else {
3810 switch (ideal_reg) {
3811 case Op_VecS: return new vecSOper();
3812 case Op_VecD: return new vecDOper();
3813 case Op_VecX: return new vecXOper();
3814 case Op_VecY: return new vecYOper();
3815 case Op_VecZ: return new vecZOper();
3816 }
3817 }
3818 ShouldNotReachHere();
3819 return nullptr;
3820 }
3821
3822 bool Matcher::is_reg2reg_move(MachNode* m) {
3823 switch (m->rule()) {
3824 case MoveVec2Leg_rule:
3825 case MoveLeg2Vec_rule:
3826 case MoveF2VL_rule:
3827 case MoveF2LEG_rule:
3828 case MoveVL2F_rule:
3829 case MoveLEG2F_rule:
3830 case MoveD2VL_rule:
3831 case MoveD2LEG_rule:
3832 case MoveVL2D_rule:
3833 case MoveLEG2D_rule:
3834 return true;
3835 default:
3836 return false;
3837 }
3838 }
3839
3840 bool Matcher::is_generic_vector(MachOper* opnd) {
3841 switch (opnd->opcode()) {
3842 case VEC:
3843 case LEGVEC:
3844 return true;
3845 default:
3846 return false;
3847 }
3848 }
3849
3850 //------------------------------------------------------------------------
3851
3852 const RegMask* Matcher::predicate_reg_mask(void) {
3853 return &_VECTMASK_REG_mask;
3854 }
3855
3856 // Max vector size in bytes. 0 if not supported.
3857 int Matcher::vector_width_in_bytes(BasicType bt) {
3858 assert(is_java_primitive(bt), "only primitive type vectors");
3859 // SSE2 supports 128bit vectors for all types.
3860 // AVX2 supports 256bit vectors for all types.
3861 // AVX2/EVEX supports 512bit vectors for all types.
3862 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3863 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3864 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3865 size = (UseAVX > 2) ? 64 : 32;
3866 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3867 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3868 // Use flag to limit vector size.
3869 size = MIN2(size,(int)MaxVectorSize);
3870 // Minimum 2 values in vector (or 4 for bytes).
3871 switch (bt) {
3872 case T_DOUBLE:
3873 case T_LONG:
3874 if (size < 16) return 0;
3875 break;
3876 case T_FLOAT:
3877 case T_INT:
3878 if (size < 8) return 0;
3879 break;
3880 case T_BOOLEAN:
3881 if (size < 4) return 0;
3882 break;
3883 case T_CHAR:
3884 if (size < 4) return 0;
3885 break;
3886 case T_BYTE:
3887 if (size < 4) return 0;
3888 break;
3889 case T_SHORT:
3890 if (size < 4) return 0;
3891 break;
3892 default:
3893 ShouldNotReachHere();
3894 }
3895 return size;
3896 }
3897
3898 // Limits on vector size (number of elements) loaded into vector.
3899 int Matcher::max_vector_size(const BasicType bt) {
3900 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3901 }
3902 int Matcher::min_vector_size(const BasicType bt) {
3903 int max_size = max_vector_size(bt);
3904 // Min size which can be loaded into vector is 4 bytes.
3905 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3906 // Support for calling svml double64 vectors
3907 if (bt == T_DOUBLE) {
3908 size = 1;
3909 }
3910 return MIN2(size,max_size);
3911 }
3912
3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3914 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3915 // by default on Cascade Lake
3916 if (VM_Version::is_default_intel_cascade_lake()) {
3917 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3918 }
3919 return Matcher::max_vector_size(bt);
3920 }
3921
3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3923 return -1;
3924 }
3925
3926 // Vector ideal reg corresponding to specified size in bytes
3927 uint Matcher::vector_ideal_reg(int size) {
3928 assert(MaxVectorSize >= size, "");
3929 switch(size) {
3930 case 4: return Op_VecS;
3931 case 8: return Op_VecD;
3932 case 16: return Op_VecX;
3933 case 32: return Op_VecY;
3934 case 64: return Op_VecZ;
3935 }
3936 ShouldNotReachHere();
3937 return 0;
3938 }
3939
3940 // Check for shift by small constant as well
3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3942 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3943 shift->in(2)->get_int() <= 3 &&
3944 // Are there other uses besides address expressions?
3945 !matcher->is_visited(shift)) {
3946 address_visited.set(shift->_idx); // Flag as address_visited
3947 mstack.push(shift->in(2), Matcher::Visit);
3948 Node *conv = shift->in(1);
3949 // Allow Matcher to match the rule which bypass
3950 // ConvI2L operation for an array index on LP64
3951 // if the index value is positive.
3952 if (conv->Opcode() == Op_ConvI2L &&
3953 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3954 // Are there other uses besides address expressions?
3955 !matcher->is_visited(conv)) {
3956 address_visited.set(conv->_idx); // Flag as address_visited
3957 mstack.push(conv->in(1), Matcher::Pre_Visit);
3958 } else {
3959 mstack.push(conv, Matcher::Pre_Visit);
3960 }
3961 return true;
3962 }
3963 return false;
3964 }
3965
3966 // This function identifies sub-graphs in which a 'load' node is
3967 // input to two different nodes, and such that it can be matched
3968 // with BMI instructions like blsi, blsr, etc.
3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3971 // refers to the same node.
3972 //
3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3974 // This is a temporary solution until we make DAGs expressible in ADL.
3975 template<typename ConType>
3976 class FusedPatternMatcher {
3977 Node* _op1_node;
3978 Node* _mop_node;
3979 int _con_op;
3980
3981 static int match_next(Node* n, int next_op, int next_op_idx) {
3982 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3983 return -1;
3984 }
3985
3986 if (next_op_idx == -1) { // n is commutative, try rotations
3987 if (n->in(1)->Opcode() == next_op) {
3988 return 1;
3989 } else if (n->in(2)->Opcode() == next_op) {
3990 return 2;
3991 }
3992 } else {
3993 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3994 if (n->in(next_op_idx)->Opcode() == next_op) {
3995 return next_op_idx;
3996 }
3997 }
3998 return -1;
3999 }
4000
4001 public:
4002 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4003 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4004
4005 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4006 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4007 typename ConType::NativeType con_value) {
4008 if (_op1_node->Opcode() != op1) {
4009 return false;
4010 }
4011 if (_mop_node->outcnt() > 2) {
4012 return false;
4013 }
4014 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4015 if (op1_op2_idx == -1) {
4016 return false;
4017 }
4018 // Memory operation must be the other edge
4019 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4020
4021 // Check that the mop node is really what we want
4022 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4023 Node* op2_node = _op1_node->in(op1_op2_idx);
4024 if (op2_node->outcnt() > 1) {
4025 return false;
4026 }
4027 assert(op2_node->Opcode() == op2, "Should be");
4028 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4029 if (op2_con_idx == -1) {
4030 return false;
4031 }
4032 // Memory operation must be the other edge
4033 int op2_mop_idx = (op2_con_idx & 1) + 1;
4034 // Check that the memory operation is the same node
4035 if (op2_node->in(op2_mop_idx) == _mop_node) {
4036 // Now check the constant
4037 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4038 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4039 return true;
4040 }
4041 }
4042 }
4043 return false;
4044 }
4045 };
4046
4047 static bool is_bmi_pattern(Node* n, Node* m) {
4048 assert(UseBMI1Instructions, "sanity");
4049 if (n != nullptr && m != nullptr) {
4050 if (m->Opcode() == Op_LoadI) {
4051 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4052 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4053 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4054 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4055 } else if (m->Opcode() == Op_LoadL) {
4056 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4057 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4058 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4059 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4060 }
4061 }
4062 return false;
4063 }
4064
4065 // Should the matcher clone input 'm' of node 'n'?
4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4067 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4068 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4069 mstack.push(m, Visit);
4070 return true;
4071 }
4072 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4073 mstack.push(m, Visit); // m = ShiftCntV
4074 return true;
4075 }
4076 if (is_encode_and_store_pattern(n, m)) {
4077 mstack.push(m, Visit);
4078 return true;
4079 }
4080 return false;
4081 }
4082
4083 // Should the Matcher clone shifts on addressing modes, expecting them
4084 // to be subsumed into complex addressing expressions or compute them
4085 // into registers?
4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4087 Node *off = m->in(AddPNode::Offset);
4088 if (off->is_Con()) {
4089 address_visited.test_set(m->_idx); // Flag as address_visited
4090 Node *adr = m->in(AddPNode::Address);
4091
4092 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4093 // AtomicAdd is not an addressing expression.
4094 // Cheap to find it by looking for screwy base.
4095 if (adr->is_AddP() &&
4096 !adr->in(AddPNode::Base)->is_top() &&
4097 !adr->in(AddPNode::Offset)->is_Con() &&
4098 off->get_long() == (int) (off->get_long()) && // immL32
4099 // Are there other uses besides address expressions?
4100 !is_visited(adr)) {
4101 address_visited.set(adr->_idx); // Flag as address_visited
4102 Node *shift = adr->in(AddPNode::Offset);
4103 if (!clone_shift(shift, this, mstack, address_visited)) {
4104 mstack.push(shift, Pre_Visit);
4105 }
4106 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4107 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4108 } else {
4109 mstack.push(adr, Pre_Visit);
4110 }
4111
4112 // Clone X+offset as it also folds into most addressing expressions
4113 mstack.push(off, Visit);
4114 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4115 return true;
4116 } else if (clone_shift(off, this, mstack, address_visited)) {
4117 address_visited.test_set(m->_idx); // Flag as address_visited
4118 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4119 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4120 return true;
4121 }
4122 return false;
4123 }
4124
4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4126 switch (bt) {
4127 case BoolTest::eq:
4128 return Assembler::eq;
4129 case BoolTest::ne:
4130 return Assembler::neq;
4131 case BoolTest::le:
4132 case BoolTest::ule:
4133 return Assembler::le;
4134 case BoolTest::ge:
4135 case BoolTest::uge:
4136 return Assembler::nlt;
4137 case BoolTest::lt:
4138 case BoolTest::ult:
4139 return Assembler::lt;
4140 case BoolTest::gt:
4141 case BoolTest::ugt:
4142 return Assembler::nle;
4143 default : ShouldNotReachHere(); return Assembler::_false;
4144 }
4145 }
4146
4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4148 switch (bt) {
4149 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4150 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4151 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4152 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4153 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4154 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4155 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4156 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4157 }
4158 }
4159
4160 // Helper methods for MachSpillCopyNode::implementation().
4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4162 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4163 assert(ireg == Op_VecS || // 32bit vector
4164 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4165 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4166 "no non-adjacent vector moves" );
4167 if (masm) {
4168 switch (ireg) {
4169 case Op_VecS: // copy whole register
4170 case Op_VecD:
4171 case Op_VecX:
4172 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4173 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4174 } else {
4175 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4176 }
4177 break;
4178 case Op_VecY:
4179 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4180 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4181 } else {
4182 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4183 }
4184 break;
4185 case Op_VecZ:
4186 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4187 break;
4188 default:
4189 ShouldNotReachHere();
4190 }
4191 #ifndef PRODUCT
4192 } else {
4193 switch (ireg) {
4194 case Op_VecS:
4195 case Op_VecD:
4196 case Op_VecX:
4197 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4198 break;
4199 case Op_VecY:
4200 case Op_VecZ:
4201 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4202 break;
4203 default:
4204 ShouldNotReachHere();
4205 }
4206 #endif
4207 }
4208 }
4209
4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4211 int stack_offset, int reg, uint ireg, outputStream* st) {
4212 if (masm) {
4213 if (is_load) {
4214 switch (ireg) {
4215 case Op_VecS:
4216 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4217 break;
4218 case Op_VecD:
4219 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecX:
4222 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4223 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4224 } else {
4225 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4226 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4227 }
4228 break;
4229 case Op_VecY:
4230 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4231 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4232 } else {
4233 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4234 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4235 }
4236 break;
4237 case Op_VecZ:
4238 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4239 break;
4240 default:
4241 ShouldNotReachHere();
4242 }
4243 } else { // store
4244 switch (ireg) {
4245 case Op_VecS:
4246 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4247 break;
4248 case Op_VecD:
4249 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecX:
4252 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4253 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4254 }
4255 else {
4256 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4257 }
4258 break;
4259 case Op_VecY:
4260 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4261 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4262 }
4263 else {
4264 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4265 }
4266 break;
4267 case Op_VecZ:
4268 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4269 break;
4270 default:
4271 ShouldNotReachHere();
4272 }
4273 }
4274 #ifndef PRODUCT
4275 } else {
4276 if (is_load) {
4277 switch (ireg) {
4278 case Op_VecS:
4279 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecD:
4282 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecX:
4285 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecY:
4288 case Op_VecZ:
4289 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4290 break;
4291 default:
4292 ShouldNotReachHere();
4293 }
4294 } else { // store
4295 switch (ireg) {
4296 case Op_VecS:
4297 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecD:
4300 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecX:
4303 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecY:
4306 case Op_VecZ:
4307 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4308 break;
4309 default:
4310 ShouldNotReachHere();
4311 }
4312 }
4313 #endif
4314 }
4315 }
4316
4317 template <class T>
4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4319 int size = type2aelembytes(bt) * len;
4320 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4321 for (int i = 0; i < len; i++) {
4322 int offset = i * type2aelembytes(bt);
4323 switch (bt) {
4324 case T_BYTE: val->at(i) = con; break;
4325 case T_SHORT: {
4326 jshort c = con;
4327 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4328 break;
4329 }
4330 case T_INT: {
4331 jint c = con;
4332 memcpy(val->adr_at(offset), &c, sizeof(jint));
4333 break;
4334 }
4335 case T_LONG: {
4336 jlong c = con;
4337 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4338 break;
4339 }
4340 case T_FLOAT: {
4341 jfloat c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4343 break;
4344 }
4345 case T_DOUBLE: {
4346 jdouble c = con;
4347 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4348 break;
4349 }
4350 default: assert(false, "%s", type2name(bt));
4351 }
4352 }
4353 return val;
4354 }
4355
4356 static inline jlong high_bit_set(BasicType bt) {
4357 switch (bt) {
4358 case T_BYTE: return 0x8080808080808080;
4359 case T_SHORT: return 0x8000800080008000;
4360 case T_INT: return 0x8000000080000000;
4361 case T_LONG: return 0x8000000000000000;
4362 default:
4363 ShouldNotReachHere();
4364 return 0;
4365 }
4366 }
4367
4368 #ifndef PRODUCT
4369 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4370 st->print("nop \t# %d bytes pad for loops and calls", _count);
4371 }
4372 #endif
4373
4374 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4375 __ nop(_count);
4376 }
4377
4378 uint MachNopNode::size(PhaseRegAlloc*) const {
4379 return _count;
4380 }
4381
4382 #ifndef PRODUCT
4383 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4384 st->print("# breakpoint");
4385 }
4386 #endif
4387
4388 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4389 __ int3();
4390 }
4391
4392 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4393 return MachNode::size(ra_);
4394 }
4395
4396 %}
4397
4398 //----------ENCODING BLOCK-----------------------------------------------------
4399 // This block specifies the encoding classes used by the compiler to
4400 // output byte streams. Encoding classes are parameterized macros
4401 // used by Machine Instruction Nodes in order to generate the bit
4402 // encoding of the instruction. Operands specify their base encoding
4403 // interface with the interface keyword. There are currently
4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4405 // COND_INTER. REG_INTER causes an operand to generate a function
4406 // which returns its register number when queried. CONST_INTER causes
4407 // an operand to generate a function which returns the value of the
4408 // constant when queried. MEMORY_INTER causes an operand to generate
4409 // four functions which return the Base Register, the Index Register,
4410 // the Scale Value, and the Offset Value of the operand when queried.
4411 // COND_INTER causes an operand to generate six functions which return
4412 // the encoding code (ie - encoding bits for the instruction)
4413 // associated with each basic boolean condition for a conditional
4414 // instruction.
4415 //
4416 // Instructions specify two basic values for encoding. Again, a
4417 // function is available to check if the constant displacement is an
4418 // oop. They use the ins_encode keyword to specify their encoding
4419 // classes (which must be a sequence of enc_class names, and their
4420 // parameters, specified in the encoding block), and they use the
4421 // opcode keyword to specify, in order, their primary, secondary, and
4422 // tertiary opcode. Only the opcode sections which a particular
4423 // instruction needs for encoding need to be specified.
4424 encode %{
4425 enc_class cdql_enc(no_rax_rdx_RegI div)
4426 %{
4427 // Full implementation of Java idiv and irem; checks for
4428 // special case as described in JVM spec., p.243 & p.271.
4429 //
4430 // normal case special case
4431 //
4432 // input : rax: dividend min_int
4433 // reg: divisor -1
4434 //
4435 // output: rax: quotient (= rax idiv reg) min_int
4436 // rdx: remainder (= rax irem reg) 0
4437 //
4438 // Code sequnce:
4439 //
4440 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4441 // 5: 75 07/08 jne e <normal>
4442 // 7: 33 d2 xor %edx,%edx
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4446 // c: 74 03/04 je 11 <done>
4447 // 000000000000000e <normal>:
4448 // e: 99 cltd
4449 // [div >= 8 -> offset + 1]
4450 // [REX_B]
4451 // f: f7 f9 idiv $div
4452 // 0000000000000011 <done>:
4453 Label normal;
4454 Label done;
4455
4456 // cmp $0x80000000,%eax
4457 __ cmpl(as_Register(RAX_enc), 0x80000000);
4458
4459 // jne e <normal>
4460 __ jccb(Assembler::notEqual, normal);
4461
4462 // xor %edx,%edx
4463 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4464
4465 // cmp $0xffffffffffffffff,%ecx
4466 __ cmpl($div$$Register, -1);
4467
4468 // je 11 <done>
4469 __ jccb(Assembler::equal, done);
4470
4471 // <normal>
4472 // cltd
4473 __ bind(normal);
4474 __ cdql();
4475
4476 // idivl
4477 // <done>
4478 __ idivl($div$$Register);
4479 __ bind(done);
4480 %}
4481
4482 enc_class cdqq_enc(no_rax_rdx_RegL div)
4483 %{
4484 // Full implementation of Java ldiv and lrem; checks for
4485 // special case as described in JVM spec., p.243 & p.271.
4486 //
4487 // normal case special case
4488 //
4489 // input : rax: dividend min_long
4490 // reg: divisor -1
4491 //
4492 // output: rax: quotient (= rax idiv reg) min_long
4493 // rdx: remainder (= rax irem reg) 0
4494 //
4495 // Code sequnce:
4496 //
4497 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4498 // 7: 00 00 80
4499 // a: 48 39 d0 cmp %rdx,%rax
4500 // d: 75 08 jne 17 <normal>
4501 // f: 33 d2 xor %edx,%edx
4502 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4503 // 15: 74 05 je 1c <done>
4504 // 0000000000000017 <normal>:
4505 // 17: 48 99 cqto
4506 // 19: 48 f7 f9 idiv $div
4507 // 000000000000001c <done>:
4508 Label normal;
4509 Label done;
4510
4511 // mov $0x8000000000000000,%rdx
4512 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4513
4514 // cmp %rdx,%rax
4515 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4516
4517 // jne 17 <normal>
4518 __ jccb(Assembler::notEqual, normal);
4519
4520 // xor %edx,%edx
4521 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4522
4523 // cmp $0xffffffffffffffff,$div
4524 __ cmpq($div$$Register, -1);
4525
4526 // je 1e <done>
4527 __ jccb(Assembler::equal, done);
4528
4529 // <normal>
4530 // cqto
4531 __ bind(normal);
4532 __ cdqq();
4533
4534 // idivq (note: must be emitted by the user of this rule)
4535 // <done>
4536 __ idivq($div$$Register);
4537 __ bind(done);
4538 %}
4539
4540 enc_class clear_avx %{
4541 DEBUG_ONLY(int off0 = __ offset());
4542 if (generate_vzeroupper(Compile::current())) {
4543 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4544 // Clear upper bits of YMM registers when current compiled code uses
4545 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4546 __ vzeroupper();
4547 }
4548 DEBUG_ONLY(int off1 = __ offset());
4549 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4550 %}
4551
4552 enc_class Java_To_Runtime(method meth) %{
4553 __ lea(r10, RuntimeAddress((address)$meth$$method));
4554 __ call(r10);
4555 __ post_call_nop();
4556 %}
4557
4558 enc_class Java_Static_Call(method meth)
4559 %{
4560 // JAVA STATIC CALL
4561 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4562 // determine who we intended to call.
4563 if (!_method) {
4564 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4565 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4566 // The NOP here is purely to ensure that eliding a call to
4567 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4568 __ addr_nop_5();
4569 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4570 } else {
4571 int method_index = resolved_method_index(masm);
4572 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4573 : static_call_Relocation::spec(method_index);
4574 address mark = __ pc();
4575 int call_offset = __ offset();
4576 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4577 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4578 // Calls of the same statically bound method can share
4579 // a stub to the interpreter.
4580 __ code()->shared_stub_to_interp_for(_method, call_offset);
4581 } else {
4582 // Emit stubs for static call.
4583 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4584 __ clear_inst_mark();
4585 if (stub == nullptr) {
4586 ciEnv::current()->record_failure("CodeCache is full");
4587 return;
4588 }
4589 }
4590 }
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class Java_Dynamic_Call(method meth) %{
4595 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class call_epilog %{
4600 if (VerifyStackAtCalls) {
4601 // Check that stack depth is unchanged: find majik cookie on stack
4602 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4603 Label L;
4604 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4605 __ jccb(Assembler::equal, L);
4606 // Die if stack mismatch
4607 __ int3();
4608 __ bind(L);
4609 }
4610 %}
4611
4612 %}
4613
4614 //----------FRAME--------------------------------------------------------------
4615 // Definition of frame structure and management information.
4616 //
4617 // S T A C K L A Y O U T Allocators stack-slot number
4618 // | (to get allocators register number
4619 // G Owned by | | v add OptoReg::stack0())
4620 // r CALLER | |
4621 // o | +--------+ pad to even-align allocators stack-slot
4622 // w V | pad0 | numbers; owned by CALLER
4623 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4624 // h ^ | in | 5
4625 // | | args | 4 Holes in incoming args owned by SELF
4626 // | | | | 3
4627 // | | +--------+
4628 // V | | old out| Empty on Intel, window on Sparc
4629 // | old |preserve| Must be even aligned.
4630 // | SP-+--------+----> Matcher::_old_SP, even aligned
4631 // | | in | 3 area for Intel ret address
4632 // Owned by |preserve| Empty on Sparc.
4633 // SELF +--------+
4634 // | | pad2 | 2 pad to align old SP
4635 // | +--------+ 1
4636 // | | locks | 0
4637 // | +--------+----> OptoReg::stack0(), even aligned
4638 // | | pad1 | 11 pad to align new SP
4639 // | +--------+
4640 // | | | 10
4641 // | | spills | 9 spills
4642 // V | | 8 (pad0 slot for callee)
4643 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4644 // ^ | out | 7
4645 // | | args | 6 Holes in outgoing args owned by CALLEE
4646 // Owned by +--------+
4647 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4648 // | new |preserve| Must be even-aligned.
4649 // | SP-+--------+----> Matcher::_new_SP, even aligned
4650 // | | |
4651 //
4652 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4653 // known from SELF's arguments and the Java calling convention.
4654 // Region 6-7 is determined per call site.
4655 // Note 2: If the calling convention leaves holes in the incoming argument
4656 // area, those holes are owned by SELF. Holes in the outgoing area
4657 // are owned by the CALLEE. Holes should not be necessary in the
4658 // incoming area, as the Java calling convention is completely under
4659 // the control of the AD file. Doubles can be sorted and packed to
4660 // avoid holes. Holes in the outgoing arguments may be necessary for
4661 // varargs C calling conventions.
4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4663 // even aligned with pad0 as needed.
4664 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4665 // region 6-11 is even aligned; it may be padded out more so that
4666 // the region from SP to FP meets the minimum stack alignment.
4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4668 // alignment. Region 11, pad1, may be dynamically extended so that
4669 // SP meets the minimum alignment.
4670
4671 frame
4672 %{
4673 // These three registers define part of the calling convention
4674 // between compiled code and the interpreter.
4675 inline_cache_reg(RAX); // Inline Cache Register
4676
4677 // Optional: name the operand used by cisc-spilling to access
4678 // [stack_pointer + offset]
4679 cisc_spilling_operand_name(indOffset32);
4680
4681 // Number of stack slots consumed by locking an object
4682 sync_stack_slots(2);
4683
4684 // Compiled code's Frame Pointer
4685 frame_pointer(RSP);
4686
4687 // Stack alignment requirement
4688 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4689
4690 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4691 // for calls to C. Supports the var-args backing area for register parms.
4692 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4693
4694 // The after-PROLOG location of the return address. Location of
4695 // return address specifies a type (REG or STACK) and a number
4696 // representing the register number (i.e. - use a register name) or
4697 // stack slot.
4698 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4699 // Otherwise, it is above the locks and verification slot and alignment word
4700 return_addr(STACK - 2 +
4701 align_up((Compile::current()->in_preserve_stack_slots() +
4702 Compile::current()->fixed_slots()),
4703 stack_alignment_in_slots()));
4704
4705 // Location of compiled Java return values. Same as C for now.
4706 return_value
4707 %{
4708 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4709 "only return normal values");
4710
4711 static const int lo[Op_RegL + 1] = {
4712 0,
4713 0,
4714 RAX_num, // Op_RegN
4715 RAX_num, // Op_RegI
4716 RAX_num, // Op_RegP
4717 XMM0_num, // Op_RegF
4718 XMM0_num, // Op_RegD
4719 RAX_num // Op_RegL
4720 };
4721 static const int hi[Op_RegL + 1] = {
4722 0,
4723 0,
4724 OptoReg::Bad, // Op_RegN
4725 OptoReg::Bad, // Op_RegI
4726 RAX_H_num, // Op_RegP
4727 OptoReg::Bad, // Op_RegF
4728 XMM0b_num, // Op_RegD
4729 RAX_H_num // Op_RegL
4730 };
4731 // Excluded flags and vector registers.
4732 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4733 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4734 %}
4735 %}
4736
4737 //----------ATTRIBUTES---------------------------------------------------------
4738 //----------Operand Attributes-------------------------------------------------
4739 op_attrib op_cost(0); // Required cost attribute
4740
4741 //----------Instruction Attributes---------------------------------------------
4742 ins_attrib ins_cost(100); // Required cost attribute
4743 ins_attrib ins_size(8); // Required size attribute (in bits)
4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4745 // a non-matching short branch variant
4746 // of some long branch?
4747 ins_attrib ins_alignment(1); // Required alignment attribute (must
4748 // be a power of 2) specifies the
4749 // alignment that some part of the
4750 // instruction (not necessarily the
4751 // start) requires. If > 1, a
4752 // compute_padding() function must be
4753 // provided for the instruction
4754
4755 // Whether this node is expanded during code emission into a sequence of
4756 // instructions and the first instruction can perform an implicit null check.
4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4758
4759 //----------OPERANDS-----------------------------------------------------------
4760 // Operand definitions must precede instruction definitions for correct parsing
4761 // in the ADLC because operands constitute user defined types which are used in
4762 // instruction definitions.
4763
4764 //----------Simple Operands----------------------------------------------------
4765 // Immediate Operands
4766 // Integer Immediate
4767 operand immI()
4768 %{
4769 match(ConI);
4770
4771 op_cost(10);
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 // Constant for test vs zero
4777 operand immI_0()
4778 %{
4779 predicate(n->get_int() == 0);
4780 match(ConI);
4781
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Constant for increment
4788 operand immI_1()
4789 %{
4790 predicate(n->get_int() == 1);
4791 match(ConI);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Constant for decrement
4799 operand immI_M1()
4800 %{
4801 predicate(n->get_int() == -1);
4802 match(ConI);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immI_2()
4810 %{
4811 predicate(n->get_int() == 2);
4812 match(ConI);
4813
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 operand immI_4()
4820 %{
4821 predicate(n->get_int() == 4);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 operand immI_8()
4830 %{
4831 predicate(n->get_int() == 8);
4832 match(ConI);
4833
4834 op_cost(0);
4835 format %{ %}
4836 interface(CONST_INTER);
4837 %}
4838
4839 // Valid scale values for addressing modes
4840 operand immI2()
4841 %{
4842 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4843 match(ConI);
4844
4845 format %{ %}
4846 interface(CONST_INTER);
4847 %}
4848
4849 operand immU7()
4850 %{
4851 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4852 match(ConI);
4853
4854 op_cost(5);
4855 format %{ %}
4856 interface(CONST_INTER);
4857 %}
4858
4859 operand immI8()
4860 %{
4861 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4862 match(ConI);
4863
4864 op_cost(5);
4865 format %{ %}
4866 interface(CONST_INTER);
4867 %}
4868
4869 operand immU8()
4870 %{
4871 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4872 match(ConI);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879 operand immI16()
4880 %{
4881 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4882 match(ConI);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Int Immediate non-negative
4890 operand immU31()
4891 %{
4892 predicate(n->get_int() >= 0);
4893 match(ConI);
4894
4895 op_cost(0);
4896 format %{ %}
4897 interface(CONST_INTER);
4898 %}
4899
4900 // Pointer Immediate
4901 operand immP()
4902 %{
4903 match(ConP);
4904
4905 op_cost(10);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910 // Null Pointer Immediate
4911 operand immP0()
4912 %{
4913 predicate(n->get_ptr() == 0);
4914 match(ConP);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Pointer Immediate
4922 operand immN() %{
4923 match(ConN);
4924
4925 op_cost(10);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immNKlass() %{
4931 match(ConNKlass);
4932
4933 op_cost(10);
4934 format %{ %}
4935 interface(CONST_INTER);
4936 %}
4937
4938 // Null Pointer Immediate
4939 operand immN0() %{
4940 predicate(n->get_narrowcon() == 0);
4941 match(ConN);
4942
4943 op_cost(5);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 operand immP31()
4949 %{
4950 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4951 && (n->get_ptr() >> 31) == 0);
4952 match(ConP);
4953
4954 op_cost(5);
4955 format %{ %}
4956 interface(CONST_INTER);
4957 %}
4958
4959
4960 // Long Immediate
4961 operand immL()
4962 %{
4963 match(ConL);
4964
4965 op_cost(20);
4966 format %{ %}
4967 interface(CONST_INTER);
4968 %}
4969
4970 // Long Immediate 8-bit
4971 operand immL8()
4972 %{
4973 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4974 match(ConL);
4975
4976 op_cost(5);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate 32-bit unsigned
4982 operand immUL32()
4983 %{
4984 predicate(n->get_long() == (unsigned int) (n->get_long()));
4985 match(ConL);
4986
4987 op_cost(10);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Long Immediate 32-bit signed
4993 operand immL32()
4994 %{
4995 predicate(n->get_long() == (int) (n->get_long()));
4996 match(ConL);
4997
4998 op_cost(15);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003 operand immL_Pow2()
5004 %{
5005 predicate(is_power_of_2((julong)n->get_long()));
5006 match(ConL);
5007
5008 op_cost(15);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 operand immL_NotPow2()
5014 %{
5015 predicate(is_power_of_2((julong)~n->get_long()));
5016 match(ConL);
5017
5018 op_cost(15);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate zero
5024 operand immL0()
5025 %{
5026 predicate(n->get_long() == 0L);
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Constant for increment
5035 operand immL1()
5036 %{
5037 predicate(n->get_long() == 1);
5038 match(ConL);
5039
5040 format %{ %}
5041 interface(CONST_INTER);
5042 %}
5043
5044 // Constant for decrement
5045 operand immL_M1()
5046 %{
5047 predicate(n->get_long() == -1);
5048 match(ConL);
5049
5050 format %{ %}
5051 interface(CONST_INTER);
5052 %}
5053
5054 // Long Immediate: low 32-bit mask
5055 operand immL_32bits()
5056 %{
5057 predicate(n->get_long() == 0xFFFFFFFFL);
5058 match(ConL);
5059 op_cost(20);
5060
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Int Immediate: 2^n-1, positive
5066 operand immI_Pow2M1()
5067 %{
5068 predicate((n->get_int() > 0)
5069 && is_power_of_2((juint)n->get_int() + 1));
5070 match(ConI);
5071
5072 op_cost(20);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Float Immediate zero
5078 operand immF0()
5079 %{
5080 predicate(jint_cast(n->getf()) == 0);
5081 match(ConF);
5082
5083 op_cost(5);
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Float Immediate
5089 operand immF()
5090 %{
5091 match(ConF);
5092
5093 op_cost(15);
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Half Float Immediate
5099 operand immH()
5100 %{
5101 match(ConH);
5102
5103 op_cost(15);
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Double Immediate zero
5109 operand immD0()
5110 %{
5111 predicate(jlong_cast(n->getd()) == 0);
5112 match(ConD);
5113
5114 op_cost(5);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Double Immediate
5120 operand immD()
5121 %{
5122 match(ConD);
5123
5124 op_cost(15);
5125 format %{ %}
5126 interface(CONST_INTER);
5127 %}
5128
5129 // Immediates for special shifts (sign extend)
5130
5131 // Constants for increment
5132 operand immI_16()
5133 %{
5134 predicate(n->get_int() == 16);
5135 match(ConI);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand immI_24()
5142 %{
5143 predicate(n->get_int() == 24);
5144 match(ConI);
5145
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Constant for byte-wide masking
5151 operand immI_255()
5152 %{
5153 predicate(n->get_int() == 255);
5154 match(ConI);
5155
5156 format %{ %}
5157 interface(CONST_INTER);
5158 %}
5159
5160 // Constant for short-wide masking
5161 operand immI_65535()
5162 %{
5163 predicate(n->get_int() == 65535);
5164 match(ConI);
5165
5166 format %{ %}
5167 interface(CONST_INTER);
5168 %}
5169
5170 // Constant for byte-wide masking
5171 operand immL_255()
5172 %{
5173 predicate(n->get_long() == 255);
5174 match(ConL);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 // Constant for short-wide masking
5181 operand immL_65535()
5182 %{
5183 predicate(n->get_long() == 65535);
5184 match(ConL);
5185
5186 format %{ %}
5187 interface(CONST_INTER);
5188 %}
5189
5190 // AOT Runtime Constants Address
5191 operand immAOTRuntimeConstantsAddress()
5192 %{
5193 // Check if the address is in the range of AOT Runtime Constants
5194 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5195 match(ConP);
5196
5197 op_cost(0);
5198 format %{ %}
5199 interface(CONST_INTER);
5200 %}
5201
5202 operand kReg()
5203 %{
5204 constraint(ALLOC_IN_RC(vectmask_reg));
5205 match(RegVectMask);
5206 format %{%}
5207 interface(REG_INTER);
5208 %}
5209
5210 // Register Operands
5211 // Integer Register
5212 operand rRegI()
5213 %{
5214 constraint(ALLOC_IN_RC(int_reg));
5215 match(RegI);
5216
5217 match(rax_RegI);
5218 match(rbx_RegI);
5219 match(rcx_RegI);
5220 match(rdx_RegI);
5221 match(rdi_RegI);
5222
5223 format %{ %}
5224 interface(REG_INTER);
5225 %}
5226
5227 // Special Registers
5228 operand rax_RegI()
5229 %{
5230 constraint(ALLOC_IN_RC(int_rax_reg));
5231 match(RegI);
5232 match(rRegI);
5233
5234 format %{ "RAX" %}
5235 interface(REG_INTER);
5236 %}
5237
5238 // Special Registers
5239 operand rbx_RegI()
5240 %{
5241 constraint(ALLOC_IN_RC(int_rbx_reg));
5242 match(RegI);
5243 match(rRegI);
5244
5245 format %{ "RBX" %}
5246 interface(REG_INTER);
5247 %}
5248
5249 operand rcx_RegI()
5250 %{
5251 constraint(ALLOC_IN_RC(int_rcx_reg));
5252 match(RegI);
5253 match(rRegI);
5254
5255 format %{ "RCX" %}
5256 interface(REG_INTER);
5257 %}
5258
5259 operand rdx_RegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_rdx_reg));
5262 match(RegI);
5263 match(rRegI);
5264
5265 format %{ "RDX" %}
5266 interface(REG_INTER);
5267 %}
5268
5269 operand rdi_RegI()
5270 %{
5271 constraint(ALLOC_IN_RC(int_rdi_reg));
5272 match(RegI);
5273 match(rRegI);
5274
5275 format %{ "RDI" %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand no_rax_rdx_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5282 match(RegI);
5283 match(rbx_RegI);
5284 match(rcx_RegI);
5285 match(rdi_RegI);
5286
5287 format %{ %}
5288 interface(REG_INTER);
5289 %}
5290
5291 operand no_rbp_r13_RegI()
5292 %{
5293 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5294 match(RegI);
5295 match(rRegI);
5296 match(rax_RegI);
5297 match(rbx_RegI);
5298 match(rcx_RegI);
5299 match(rdx_RegI);
5300 match(rdi_RegI);
5301
5302 format %{ %}
5303 interface(REG_INTER);
5304 %}
5305
5306 // Pointer Register
5307 operand any_RegP()
5308 %{
5309 constraint(ALLOC_IN_RC(any_reg));
5310 match(RegP);
5311 match(rax_RegP);
5312 match(rbx_RegP);
5313 match(rdi_RegP);
5314 match(rsi_RegP);
5315 match(rbp_RegP);
5316 match(r15_RegP);
5317 match(rRegP);
5318
5319 format %{ %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand rRegP()
5324 %{
5325 constraint(ALLOC_IN_RC(ptr_reg));
5326 match(RegP);
5327 match(rax_RegP);
5328 match(rbx_RegP);
5329 match(rdi_RegP);
5330 match(rsi_RegP);
5331 match(rbp_RegP); // See Q&A below about
5332 match(r15_RegP); // r15_RegP and rbp_RegP.
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 operand rRegN() %{
5339 constraint(ALLOC_IN_RC(int_reg));
5340 match(RegN);
5341
5342 format %{ %}
5343 interface(REG_INTER);
5344 %}
5345
5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5349 // The output of an instruction is controlled by the allocator, which respects
5350 // register class masks, not match rules. Unless an instruction mentions
5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5352 // by the allocator as an input.
5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5355 // result, RBP is not included in the output of the instruction either.
5356
5357 // This operand is not allowed to use RBP even if
5358 // RBP is not used to hold the frame pointer.
5359 operand no_rbp_RegP()
5360 %{
5361 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5362 match(RegP);
5363 match(rbx_RegP);
5364 match(rsi_RegP);
5365 match(rdi_RegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 // Special Registers
5372 // Return a pointer value
5373 operand rax_RegP()
5374 %{
5375 constraint(ALLOC_IN_RC(ptr_rax_reg));
5376 match(RegP);
5377 match(rRegP);
5378
5379 format %{ %}
5380 interface(REG_INTER);
5381 %}
5382
5383 // Special Registers
5384 // Return a compressed pointer value
5385 operand rax_RegN()
5386 %{
5387 constraint(ALLOC_IN_RC(int_rax_reg));
5388 match(RegN);
5389 match(rRegN);
5390
5391 format %{ %}
5392 interface(REG_INTER);
5393 %}
5394
5395 // Used in AtomicAdd
5396 operand rbx_RegP()
5397 %{
5398 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5399 match(RegP);
5400 match(rRegP);
5401
5402 format %{ %}
5403 interface(REG_INTER);
5404 %}
5405
5406 operand rsi_RegP()
5407 %{
5408 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5409 match(RegP);
5410 match(rRegP);
5411
5412 format %{ %}
5413 interface(REG_INTER);
5414 %}
5415
5416 operand rbp_RegP()
5417 %{
5418 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5419 match(RegP);
5420 match(rRegP);
5421
5422 format %{ %}
5423 interface(REG_INTER);
5424 %}
5425
5426 // Used in rep stosq
5427 operand rdi_RegP()
5428 %{
5429 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5430 match(RegP);
5431 match(rRegP);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 operand r15_RegP()
5438 %{
5439 constraint(ALLOC_IN_RC(ptr_r15_reg));
5440 match(RegP);
5441 match(rRegP);
5442
5443 format %{ %}
5444 interface(REG_INTER);
5445 %}
5446
5447 operand rRegL()
5448 %{
5449 constraint(ALLOC_IN_RC(long_reg));
5450 match(RegL);
5451 match(rax_RegL);
5452 match(rdx_RegL);
5453
5454 format %{ %}
5455 interface(REG_INTER);
5456 %}
5457
5458 // Special Registers
5459 operand no_rax_rdx_RegL()
5460 %{
5461 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5462 match(RegL);
5463 match(rRegL);
5464
5465 format %{ %}
5466 interface(REG_INTER);
5467 %}
5468
5469 operand rax_RegL()
5470 %{
5471 constraint(ALLOC_IN_RC(long_rax_reg));
5472 match(RegL);
5473 match(rRegL);
5474
5475 format %{ "RAX" %}
5476 interface(REG_INTER);
5477 %}
5478
5479 operand rcx_RegL()
5480 %{
5481 constraint(ALLOC_IN_RC(long_rcx_reg));
5482 match(RegL);
5483 match(rRegL);
5484
5485 format %{ %}
5486 interface(REG_INTER);
5487 %}
5488
5489 operand rdx_RegL()
5490 %{
5491 constraint(ALLOC_IN_RC(long_rdx_reg));
5492 match(RegL);
5493 match(rRegL);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 operand r11_RegL()
5500 %{
5501 constraint(ALLOC_IN_RC(long_r11_reg));
5502 match(RegL);
5503 match(rRegL);
5504
5505 format %{ %}
5506 interface(REG_INTER);
5507 %}
5508
5509 operand no_rbp_r13_RegL()
5510 %{
5511 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5512 match(RegL);
5513 match(rRegL);
5514 match(rax_RegL);
5515 match(rcx_RegL);
5516 match(rdx_RegL);
5517
5518 format %{ %}
5519 interface(REG_INTER);
5520 %}
5521
5522 // Flags register, used as output of compare instructions
5523 operand rFlagsReg()
5524 %{
5525 constraint(ALLOC_IN_RC(int_flags));
5526 match(RegFlags);
5527
5528 format %{ "RFLAGS" %}
5529 interface(REG_INTER);
5530 %}
5531
5532 // Flags register, used as output of FLOATING POINT compare instructions
5533 operand rFlagsRegU()
5534 %{
5535 constraint(ALLOC_IN_RC(int_flags));
5536 match(RegFlags);
5537
5538 format %{ "RFLAGS_U" %}
5539 interface(REG_INTER);
5540 %}
5541
5542 operand rFlagsRegUCF() %{
5543 constraint(ALLOC_IN_RC(int_flags));
5544 match(RegFlags);
5545 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5546
5547 format %{ "RFLAGS_U_CF" %}
5548 interface(REG_INTER);
5549 %}
5550
5551 operand rFlagsRegUCFE() %{
5552 constraint(ALLOC_IN_RC(int_flags));
5553 match(RegFlags);
5554 predicate(UseAPX && VM_Version::supports_avx10_2());
5555
5556 format %{ "RFLAGS_U_CFE" %}
5557 interface(REG_INTER);
5558 %}
5559
5560 // Float register operands
5561 operand regF() %{
5562 constraint(ALLOC_IN_RC(float_reg));
5563 match(RegF);
5564
5565 format %{ %}
5566 interface(REG_INTER);
5567 %}
5568
5569 // Float register operands
5570 operand legRegF() %{
5571 constraint(ALLOC_IN_RC(float_reg_legacy));
5572 match(RegF);
5573
5574 format %{ %}
5575 interface(REG_INTER);
5576 %}
5577
5578 // Float register operands
5579 operand vlRegF() %{
5580 constraint(ALLOC_IN_RC(float_reg_vl));
5581 match(RegF);
5582
5583 format %{ %}
5584 interface(REG_INTER);
5585 %}
5586
5587 // Double register operands
5588 operand regD() %{
5589 constraint(ALLOC_IN_RC(double_reg));
5590 match(RegD);
5591
5592 format %{ %}
5593 interface(REG_INTER);
5594 %}
5595
5596 // Double register operands
5597 operand legRegD() %{
5598 constraint(ALLOC_IN_RC(double_reg_legacy));
5599 match(RegD);
5600
5601 format %{ %}
5602 interface(REG_INTER);
5603 %}
5604
5605 // Double register operands
5606 operand vlRegD() %{
5607 constraint(ALLOC_IN_RC(double_reg_vl));
5608 match(RegD);
5609
5610 format %{ %}
5611 interface(REG_INTER);
5612 %}
5613
5614 //----------Memory Operands----------------------------------------------------
5615 // Direct Memory Operand
5616 // operand direct(immP addr)
5617 // %{
5618 // match(addr);
5619
5620 // format %{ "[$addr]" %}
5621 // interface(MEMORY_INTER) %{
5622 // base(0xFFFFFFFF);
5623 // index(0x4);
5624 // scale(0x0);
5625 // disp($addr);
5626 // %}
5627 // %}
5628
5629 // Indirect Memory Operand
5630 operand indirect(any_RegP reg)
5631 %{
5632 constraint(ALLOC_IN_RC(ptr_reg));
5633 match(reg);
5634
5635 format %{ "[$reg]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index(0x4);
5639 scale(0x0);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Plus Short Offset Operand
5645 operand indOffset8(any_RegP reg, immL8 off)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP reg off);
5649
5650 format %{ "[$reg + $off (8-bit)]" %}
5651 interface(MEMORY_INTER) %{
5652 base($reg);
5653 index(0x4);
5654 scale(0x0);
5655 disp($off);
5656 %}
5657 %}
5658
5659 // Indirect Memory Plus Long Offset Operand
5660 operand indOffset32(any_RegP reg, immL32 off)
5661 %{
5662 constraint(ALLOC_IN_RC(ptr_reg));
5663 match(AddP reg off);
5664
5665 format %{ "[$reg + $off (32-bit)]" %}
5666 interface(MEMORY_INTER) %{
5667 base($reg);
5668 index(0x4);
5669 scale(0x0);
5670 disp($off);
5671 %}
5672 %}
5673
5674 // Indirect Memory Plus Index Register Plus Offset Operand
5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5676 %{
5677 constraint(ALLOC_IN_RC(ptr_reg));
5678 match(AddP (AddP reg lreg) off);
5679
5680 op_cost(10);
5681 format %{"[$reg + $off + $lreg]" %}
5682 interface(MEMORY_INTER) %{
5683 base($reg);
5684 index($lreg);
5685 scale(0x0);
5686 disp($off);
5687 %}
5688 %}
5689
5690 // Indirect Memory Plus Index Register Plus Offset Operand
5691 operand indIndex(any_RegP reg, rRegL lreg)
5692 %{
5693 constraint(ALLOC_IN_RC(ptr_reg));
5694 match(AddP reg lreg);
5695
5696 op_cost(10);
5697 format %{"[$reg + $lreg]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index($lreg);
5701 scale(0x0);
5702 disp(0x0);
5703 %}
5704 %}
5705
5706 // Indirect Memory Times Scale Plus Index Register
5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP reg (LShiftL lreg scale));
5711
5712 op_cost(10);
5713 format %{"[$reg + $lreg << $scale]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index($lreg);
5717 scale($scale);
5718 disp(0x0);
5719 %}
5720 %}
5721
5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5726 match(AddP reg (LShiftL (ConvI2L idx) scale));
5727
5728 op_cost(10);
5729 format %{"[$reg + pos $idx << $scale]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($idx);
5733 scale($scale);
5734 disp(0x0);
5735 %}
5736 %}
5737
5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP (AddP reg (LShiftL lreg scale)) off);
5743
5744 op_cost(10);
5745 format %{"[$reg + $off + $lreg << $scale]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale($scale);
5750 disp($off);
5751 %}
5752 %}
5753
5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5756 %{
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5759 match(AddP (AddP reg (ConvI2L idx)) off);
5760
5761 op_cost(10);
5762 format %{"[$reg + $off + $idx]" %}
5763 interface(MEMORY_INTER) %{
5764 base($reg);
5765 index($idx);
5766 scale(0x0);
5767 disp($off);
5768 %}
5769 %}
5770
5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5773 %{
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5776 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5777
5778 op_cost(10);
5779 format %{"[$reg + $off + $idx << $scale]" %}
5780 interface(MEMORY_INTER) %{
5781 base($reg);
5782 index($idx);
5783 scale($scale);
5784 disp($off);
5785 %}
5786 %}
5787
5788 // Indirect Narrow Oop Plus Offset Operand
5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5790 // we can't free r12 even with CompressedOops::base() == nullptr.
5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5792 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5793 constraint(ALLOC_IN_RC(ptr_reg));
5794 match(AddP (DecodeN reg) off);
5795
5796 op_cost(10);
5797 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5798 interface(MEMORY_INTER) %{
5799 base(0xc); // R12
5800 index($reg);
5801 scale(0x3);
5802 disp($off);
5803 %}
5804 %}
5805
5806 // Indirect Memory Operand
5807 operand indirectNarrow(rRegN reg)
5808 %{
5809 predicate(CompressedOops::shift() == 0);
5810 constraint(ALLOC_IN_RC(ptr_reg));
5811 match(DecodeN reg);
5812
5813 format %{ "[$reg]" %}
5814 interface(MEMORY_INTER) %{
5815 base($reg);
5816 index(0x4);
5817 scale(0x0);
5818 disp(0x0);
5819 %}
5820 %}
5821
5822 // Indirect Memory Plus Short Offset Operand
5823 operand indOffset8Narrow(rRegN reg, immL8 off)
5824 %{
5825 predicate(CompressedOops::shift() == 0);
5826 constraint(ALLOC_IN_RC(ptr_reg));
5827 match(AddP (DecodeN reg) off);
5828
5829 format %{ "[$reg + $off (8-bit)]" %}
5830 interface(MEMORY_INTER) %{
5831 base($reg);
5832 index(0x4);
5833 scale(0x0);
5834 disp($off);
5835 %}
5836 %}
5837
5838 // Indirect Memory Plus Long Offset Operand
5839 operand indOffset32Narrow(rRegN reg, immL32 off)
5840 %{
5841 predicate(CompressedOops::shift() == 0);
5842 constraint(ALLOC_IN_RC(ptr_reg));
5843 match(AddP (DecodeN reg) off);
5844
5845 format %{ "[$reg + $off (32-bit)]" %}
5846 interface(MEMORY_INTER) %{
5847 base($reg);
5848 index(0x4);
5849 scale(0x0);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Plus Index Register Plus Offset Operand
5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5856 %{
5857 predicate(CompressedOops::shift() == 0);
5858 constraint(ALLOC_IN_RC(ptr_reg));
5859 match(AddP (AddP (DecodeN reg) lreg) off);
5860
5861 op_cost(10);
5862 format %{"[$reg + $off + $lreg]" %}
5863 interface(MEMORY_INTER) %{
5864 base($reg);
5865 index($lreg);
5866 scale(0x0);
5867 disp($off);
5868 %}
5869 %}
5870
5871 // Indirect Memory Plus Index Register Plus Offset Operand
5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
5873 %{
5874 predicate(CompressedOops::shift() == 0);
5875 constraint(ALLOC_IN_RC(ptr_reg));
5876 match(AddP (DecodeN reg) lreg);
5877
5878 op_cost(10);
5879 format %{"[$reg + $lreg]" %}
5880 interface(MEMORY_INTER) %{
5881 base($reg);
5882 index($lreg);
5883 scale(0x0);
5884 disp(0x0);
5885 %}
5886 %}
5887
5888 // Indirect Memory Times Scale Plus Index Register
5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5890 %{
5891 predicate(CompressedOops::shift() == 0);
5892 constraint(ALLOC_IN_RC(ptr_reg));
5893 match(AddP (DecodeN reg) (LShiftL lreg scale));
5894
5895 op_cost(10);
5896 format %{"[$reg + $lreg << $scale]" %}
5897 interface(MEMORY_INTER) %{
5898 base($reg);
5899 index($lreg);
5900 scale($scale);
5901 disp(0x0);
5902 %}
5903 %}
5904
5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5907 %{
5908 predicate(CompressedOops::shift() == 0);
5909 constraint(ALLOC_IN_RC(ptr_reg));
5910 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5911
5912 op_cost(10);
5913 format %{"[$reg + $off + $lreg << $scale]" %}
5914 interface(MEMORY_INTER) %{
5915 base($reg);
5916 index($lreg);
5917 scale($scale);
5918 disp($off);
5919 %}
5920 %}
5921
5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5924 %{
5925 constraint(ALLOC_IN_RC(ptr_reg));
5926 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5927 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5928
5929 op_cost(10);
5930 format %{"[$reg + $off + $idx]" %}
5931 interface(MEMORY_INTER) %{
5932 base($reg);
5933 index($idx);
5934 scale(0x0);
5935 disp($off);
5936 %}
5937 %}
5938
5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5941 %{
5942 constraint(ALLOC_IN_RC(ptr_reg));
5943 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5944 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5945
5946 op_cost(10);
5947 format %{"[$reg + $off + $idx << $scale]" %}
5948 interface(MEMORY_INTER) %{
5949 base($reg);
5950 index($idx);
5951 scale($scale);
5952 disp($off);
5953 %}
5954 %}
5955
5956 //----------Special Memory Operands--------------------------------------------
5957 // Stack Slot Operand - This operand is used for loading and storing temporary
5958 // values on the stack where a match requires a value to
5959 // flow through memory.
5960 operand stackSlotP(sRegP reg)
5961 %{
5962 constraint(ALLOC_IN_RC(stack_slots));
5963 // No match rule because this operand is only generated in matching
5964
5965 format %{ "[$reg]" %}
5966 interface(MEMORY_INTER) %{
5967 base(0x4); // RSP
5968 index(0x4); // No Index
5969 scale(0x0); // No Scale
5970 disp($reg); // Stack Offset
5971 %}
5972 %}
5973
5974 operand stackSlotI(sRegI reg)
5975 %{
5976 constraint(ALLOC_IN_RC(stack_slots));
5977 // No match rule because this operand is only generated in matching
5978
5979 format %{ "[$reg]" %}
5980 interface(MEMORY_INTER) %{
5981 base(0x4); // RSP
5982 index(0x4); // No Index
5983 scale(0x0); // No Scale
5984 disp($reg); // Stack Offset
5985 %}
5986 %}
5987
5988 operand stackSlotF(sRegF reg)
5989 %{
5990 constraint(ALLOC_IN_RC(stack_slots));
5991 // No match rule because this operand is only generated in matching
5992
5993 format %{ "[$reg]" %}
5994 interface(MEMORY_INTER) %{
5995 base(0x4); // RSP
5996 index(0x4); // No Index
5997 scale(0x0); // No Scale
5998 disp($reg); // Stack Offset
5999 %}
6000 %}
6001
6002 operand stackSlotD(sRegD reg)
6003 %{
6004 constraint(ALLOC_IN_RC(stack_slots));
6005 // No match rule because this operand is only generated in matching
6006
6007 format %{ "[$reg]" %}
6008 interface(MEMORY_INTER) %{
6009 base(0x4); // RSP
6010 index(0x4); // No Index
6011 scale(0x0); // No Scale
6012 disp($reg); // Stack Offset
6013 %}
6014 %}
6015 operand stackSlotL(sRegL reg)
6016 %{
6017 constraint(ALLOC_IN_RC(stack_slots));
6018 // No match rule because this operand is only generated in matching
6019
6020 format %{ "[$reg]" %}
6021 interface(MEMORY_INTER) %{
6022 base(0x4); // RSP
6023 index(0x4); // No Index
6024 scale(0x0); // No Scale
6025 disp($reg); // Stack Offset
6026 %}
6027 %}
6028
6029 //----------Conditional Branch Operands----------------------------------------
6030 // Comparison Op - This is the operation of the comparison, and is limited to
6031 // the following set of codes:
6032 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6033 //
6034 // Other attributes of the comparison, such as unsignedness, are specified
6035 // by the comparison instruction that sets a condition code flags register.
6036 // That result is represented by a flags operand whose subtype is appropriate
6037 // to the unsignedness (etc.) of the comparison.
6038 //
6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6042
6043 // Comparison Code
6044 operand cmpOp()
6045 %{
6046 match(Bool);
6047
6048 format %{ "" %}
6049 interface(COND_INTER) %{
6050 equal(0x4, "e");
6051 not_equal(0x5, "ne");
6052 less(0xc, "l");
6053 greater_equal(0xd, "ge");
6054 less_equal(0xe, "le");
6055 greater(0xf, "g");
6056 overflow(0x0, "o");
6057 no_overflow(0x1, "no");
6058 %}
6059 %}
6060
6061 // Comparison Code, unsigned compare. Used by FP also, with
6062 // C2 (unordered) turned into GT or LT already. The other bits
6063 // C0 and C3 are turned into Carry & Zero flags.
6064 operand cmpOpU()
6065 %{
6066 match(Bool);
6067
6068 format %{ "" %}
6069 interface(COND_INTER) %{
6070 equal(0x4, "e");
6071 not_equal(0x5, "ne");
6072 less(0x2, "b");
6073 greater_equal(0x3, "ae");
6074 less_equal(0x6, "be");
6075 greater(0x7, "a");
6076 overflow(0x0, "o");
6077 no_overflow(0x1, "no");
6078 %}
6079 %}
6080
6081
6082 // Floating comparisons that don't require any fixup for the unordered case,
6083 // If both inputs of the comparison are the same, ZF is always set so we
6084 // don't need to use cmpOpUCF2 for eq/ne
6085 operand cmpOpUCF() %{
6086 match(Bool);
6087 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6088 (n->as_Bool()->_test._test == BoolTest::lt ||
6089 n->as_Bool()->_test._test == BoolTest::ge ||
6090 n->as_Bool()->_test._test == BoolTest::le ||
6091 n->as_Bool()->_test._test == BoolTest::gt ||
6092 n->in(1)->in(1) == n->in(1)->in(2)));
6093 format %{ "" %}
6094 interface(COND_INTER) %{
6095 equal(0xb, "np");
6096 not_equal(0xa, "p");
6097 less(0x2, "b");
6098 greater_equal(0x3, "ae");
6099 less_equal(0x6, "be");
6100 greater(0x7, "a");
6101 overflow(0x0, "o");
6102 no_overflow(0x1, "no");
6103 %}
6104 %}
6105
6106
6107 // Floating comparisons that can be fixed up with extra conditional jumps
6108 operand cmpOpUCF2() %{
6109 match(Bool);
6110 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6111 (n->as_Bool()->_test._test == BoolTest::ne ||
6112 n->as_Bool()->_test._test == BoolTest::eq) &&
6113 n->in(1)->in(1) != n->in(1)->in(2));
6114 format %{ "" %}
6115 interface(COND_INTER) %{
6116 equal(0x4, "e");
6117 not_equal(0x5, "ne");
6118 less(0x2, "b");
6119 greater_equal(0x3, "ae");
6120 less_equal(0x6, "be");
6121 greater(0x7, "a");
6122 overflow(0x0, "o");
6123 no_overflow(0x1, "no");
6124 %}
6125 %}
6126
6127
6128 // Floating point comparisons that set condition flags to test more directly,
6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
6131 // latter conditions to ones that use unsigned tests before passing into an
6132 // instruction because the preceding comparison might be based on a three way
6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6134 operand cmpOpUCFE()
6135 %{
6136 match(Bool);
6137 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6138 (n->as_Bool()->_test._test == BoolTest::ne ||
6139 n->as_Bool()->_test._test == BoolTest::eq ||
6140 n->as_Bool()->_test._test == BoolTest::lt ||
6141 n->as_Bool()->_test._test == BoolTest::ge ||
6142 n->as_Bool()->_test._test == BoolTest::le ||
6143 n->as_Bool()->_test._test == BoolTest::gt));
6144
6145 format %{ "" %}
6146 interface(COND_INTER) %{
6147 equal(0x4, "e");
6148 not_equal(0x5, "ne");
6149 less(0x2, "b");
6150 greater_equal(0x3, "ae");
6151 less_equal(0x6, "be");
6152 greater(0x7, "a");
6153 overflow(0x0, "o");
6154 no_overflow(0x1, "no");
6155 %}
6156 %}
6157
6158 // Operands for bound floating pointer register arguments
6159 operand rxmm0() %{
6160 constraint(ALLOC_IN_RC(xmm0_reg));
6161 match(VecX);
6162 format%{%}
6163 interface(REG_INTER);
6164 %}
6165
6166 // Vectors
6167
6168 // Dummy generic vector class. Should be used for all vector operands.
6169 // Replaced with vec[SDXYZ] during post-selection pass.
6170 operand vec() %{
6171 constraint(ALLOC_IN_RC(dynamic));
6172 match(VecX);
6173 match(VecY);
6174 match(VecZ);
6175 match(VecS);
6176 match(VecD);
6177
6178 format %{ %}
6179 interface(REG_INTER);
6180 %}
6181
6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6185 // runtime code generation via reg_class_dynamic.
6186 operand legVec() %{
6187 constraint(ALLOC_IN_RC(dynamic));
6188 match(VecX);
6189 match(VecY);
6190 match(VecZ);
6191 match(VecS);
6192 match(VecD);
6193
6194 format %{ %}
6195 interface(REG_INTER);
6196 %}
6197
6198 // Replaces vec during post-selection cleanup. See above.
6199 operand vecS() %{
6200 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6201 match(VecS);
6202
6203 format %{ %}
6204 interface(REG_INTER);
6205 %}
6206
6207 // Replaces legVec during post-selection cleanup. See above.
6208 operand legVecS() %{
6209 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6210 match(VecS);
6211
6212 format %{ %}
6213 interface(REG_INTER);
6214 %}
6215
6216 // Replaces vec during post-selection cleanup. See above.
6217 operand vecD() %{
6218 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6219 match(VecD);
6220
6221 format %{ %}
6222 interface(REG_INTER);
6223 %}
6224
6225 // Replaces legVec during post-selection cleanup. See above.
6226 operand legVecD() %{
6227 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6228 match(VecD);
6229
6230 format %{ %}
6231 interface(REG_INTER);
6232 %}
6233
6234 // Replaces vec during post-selection cleanup. See above.
6235 operand vecX() %{
6236 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6237 match(VecX);
6238
6239 format %{ %}
6240 interface(REG_INTER);
6241 %}
6242
6243 // Replaces legVec during post-selection cleanup. See above.
6244 operand legVecX() %{
6245 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6246 match(VecX);
6247
6248 format %{ %}
6249 interface(REG_INTER);
6250 %}
6251
6252 // Replaces vec during post-selection cleanup. See above.
6253 operand vecY() %{
6254 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6255 match(VecY);
6256
6257 format %{ %}
6258 interface(REG_INTER);
6259 %}
6260
6261 // Replaces legVec during post-selection cleanup. See above.
6262 operand legVecY() %{
6263 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6264 match(VecY);
6265
6266 format %{ %}
6267 interface(REG_INTER);
6268 %}
6269
6270 // Replaces vec during post-selection cleanup. See above.
6271 operand vecZ() %{
6272 constraint(ALLOC_IN_RC(vectorz_reg));
6273 match(VecZ);
6274
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 // Replaces legVec during post-selection cleanup. See above.
6280 operand legVecZ() %{
6281 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6282 match(VecZ);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------OPERAND CLASSES----------------------------------------------------
6289 // Operand Classes are groups of operands that are used as to simplify
6290 // instruction definitions by not requiring the AD writer to specify separate
6291 // instructions for every form of operand when the instruction accepts
6292 // multiple operand types with the same basic encoding and format. The classic
6293 // case of this is memory operands.
6294
6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6296 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6297 indCompressedOopOffset,
6298 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6299 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6300 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6301
6302 //----------PIPELINE-----------------------------------------------------------
6303 // Rules which define the behavior of the target architectures pipeline.
6304 pipeline %{
6305
6306 //----------ATTRIBUTES---------------------------------------------------------
6307 attributes %{
6308 variable_size_instructions; // Fixed size instructions
6309 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6310 instruction_unit_size = 1; // An instruction is 1 bytes long
6311 instruction_fetch_unit_size = 16; // The processor fetches one line
6312 instruction_fetch_units = 1; // of 16 bytes
6313 %}
6314
6315 //----------RESOURCES----------------------------------------------------------
6316 // Resources are the functional units available to the machine
6317
6318 // Generic P2/P3 pipeline
6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6320 // 3 instructions decoded per cycle.
6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6322 // 3 ALU op, only ALU0 handles mul instructions.
6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6324 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6325 BR, FPU,
6326 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6327
6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
6329 // Pipeline Description specifies the stages in the machine's pipeline
6330
6331 // Generic P2/P3 pipeline
6332 pipe_desc(S0, S1, S2, S3, S4, S5);
6333
6334 //----------PIPELINE CLASSES---------------------------------------------------
6335 // Pipeline Classes describe the stages in which input and output are
6336 // referenced by the hardware pipeline.
6337
6338 // Naming convention: ialu or fpu
6339 // Then: _reg
6340 // Then: _reg if there is a 2nd register
6341 // Then: _long if it's a pair of instructions implementing a long
6342 // Then: _fat if it requires the big decoder
6343 // Or: _mem if it requires the big decoder and a memory unit.
6344
6345 // Integer ALU reg operation
6346 pipe_class ialu_reg(rRegI dst)
6347 %{
6348 single_instruction;
6349 dst : S4(write);
6350 dst : S3(read);
6351 DECODE : S0; // any decoder
6352 ALU : S3; // any alu
6353 %}
6354
6355 // Long ALU reg operation
6356 pipe_class ialu_reg_long(rRegL dst)
6357 %{
6358 instruction_count(2);
6359 dst : S4(write);
6360 dst : S3(read);
6361 DECODE : S0(2); // any 2 decoders
6362 ALU : S3(2); // both alus
6363 %}
6364
6365 // Integer ALU reg operation using big decoder
6366 pipe_class ialu_reg_fat(rRegI dst)
6367 %{
6368 single_instruction;
6369 dst : S4(write);
6370 dst : S3(read);
6371 D0 : S0; // big decoder only
6372 ALU : S3; // any alu
6373 %}
6374
6375 // Integer ALU reg-reg operation
6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6377 %{
6378 single_instruction;
6379 dst : S4(write);
6380 src : S3(read);
6381 DECODE : S0; // any decoder
6382 ALU : S3; // any alu
6383 %}
6384
6385 // Integer ALU reg-reg operation
6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6387 %{
6388 single_instruction;
6389 dst : S4(write);
6390 src : S3(read);
6391 D0 : S0; // big decoder only
6392 ALU : S3; // any alu
6393 %}
6394
6395 // Integer ALU reg-mem operation
6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6397 %{
6398 single_instruction;
6399 dst : S5(write);
6400 mem : S3(read);
6401 D0 : S0; // big decoder only
6402 ALU : S4; // any alu
6403 MEM : S3; // any mem
6404 %}
6405
6406 // Integer mem operation (prefetch)
6407 pipe_class ialu_mem(memory mem)
6408 %{
6409 single_instruction;
6410 mem : S3(read);
6411 D0 : S0; // big decoder only
6412 MEM : S3; // any mem
6413 %}
6414
6415 // Integer Store to Memory
6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
6417 %{
6418 single_instruction;
6419 mem : S3(read);
6420 src : S5(read);
6421 D0 : S0; // big decoder only
6422 ALU : S4; // any alu
6423 MEM : S3;
6424 %}
6425
6426 // // Long Store to Memory
6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6428 // %{
6429 // instruction_count(2);
6430 // mem : S3(read);
6431 // src : S5(read);
6432 // D0 : S0(2); // big decoder only; twice
6433 // ALU : S4(2); // any 2 alus
6434 // MEM : S3(2); // Both mems
6435 // %}
6436
6437 // Integer Store to Memory
6438 pipe_class ialu_mem_imm(memory mem)
6439 %{
6440 single_instruction;
6441 mem : S3(read);
6442 D0 : S0; // big decoder only
6443 ALU : S4; // any alu
6444 MEM : S3;
6445 %}
6446
6447 // Integer ALU0 reg-reg operation
6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6449 %{
6450 single_instruction;
6451 dst : S4(write);
6452 src : S3(read);
6453 D0 : S0; // Big decoder only
6454 ALU0 : S3; // only alu0
6455 %}
6456
6457 // Integer ALU0 reg-mem operation
6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6459 %{
6460 single_instruction;
6461 dst : S5(write);
6462 mem : S3(read);
6463 D0 : S0; // big decoder only
6464 ALU0 : S4; // ALU0 only
6465 MEM : S3; // any mem
6466 %}
6467
6468 // Integer ALU reg-reg operation
6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6470 %{
6471 single_instruction;
6472 cr : S4(write);
6473 src1 : S3(read);
6474 src2 : S3(read);
6475 DECODE : S0; // any decoder
6476 ALU : S3; // any alu
6477 %}
6478
6479 // Integer ALU reg-imm operation
6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6481 %{
6482 single_instruction;
6483 cr : S4(write);
6484 src1 : S3(read);
6485 DECODE : S0; // any decoder
6486 ALU : S3; // any alu
6487 %}
6488
6489 // Integer ALU reg-mem operation
6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6491 %{
6492 single_instruction;
6493 cr : S4(write);
6494 src1 : S3(read);
6495 src2 : S3(read);
6496 D0 : S0; // big decoder only
6497 ALU : S4; // any alu
6498 MEM : S3;
6499 %}
6500
6501 // Conditional move reg-reg
6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6503 %{
6504 instruction_count(4);
6505 y : S4(read);
6506 q : S3(read);
6507 p : S3(read);
6508 DECODE : S0(4); // any decoder
6509 %}
6510
6511 // Conditional move reg-reg
6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6513 %{
6514 single_instruction;
6515 dst : S4(write);
6516 src : S3(read);
6517 cr : S3(read);
6518 DECODE : S0; // any decoder
6519 %}
6520
6521 // Conditional move reg-mem
6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6523 %{
6524 single_instruction;
6525 dst : S4(write);
6526 src : S3(read);
6527 cr : S3(read);
6528 DECODE : S0; // any decoder
6529 MEM : S3;
6530 %}
6531
6532 // Conditional move reg-reg long
6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6534 %{
6535 single_instruction;
6536 dst : S4(write);
6537 src : S3(read);
6538 cr : S3(read);
6539 DECODE : S0(2); // any 2 decoders
6540 %}
6541
6542 // Float reg-reg operation
6543 pipe_class fpu_reg(regD dst)
6544 %{
6545 instruction_count(2);
6546 dst : S3(read);
6547 DECODE : S0(2); // any 2 decoders
6548 FPU : S3;
6549 %}
6550
6551 // Float reg-reg operation
6552 pipe_class fpu_reg_reg(regD dst, regD src)
6553 %{
6554 instruction_count(2);
6555 dst : S4(write);
6556 src : S3(read);
6557 DECODE : S0(2); // any 2 decoders
6558 FPU : S3;
6559 %}
6560
6561 // Float reg-reg operation
6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6563 %{
6564 instruction_count(3);
6565 dst : S4(write);
6566 src1 : S3(read);
6567 src2 : S3(read);
6568 DECODE : S0(3); // any 3 decoders
6569 FPU : S3(2);
6570 %}
6571
6572 // Float reg-reg operation
6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6574 %{
6575 instruction_count(4);
6576 dst : S4(write);
6577 src1 : S3(read);
6578 src2 : S3(read);
6579 src3 : S3(read);
6580 DECODE : S0(4); // any 3 decoders
6581 FPU : S3(2);
6582 %}
6583
6584 // Float reg-reg operation
6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6586 %{
6587 instruction_count(4);
6588 dst : S4(write);
6589 src1 : S3(read);
6590 src2 : S3(read);
6591 src3 : S3(read);
6592 DECODE : S1(3); // any 3 decoders
6593 D0 : S0; // Big decoder only
6594 FPU : S3(2);
6595 MEM : S3;
6596 %}
6597
6598 // Float reg-mem operation
6599 pipe_class fpu_reg_mem(regD dst, memory mem)
6600 %{
6601 instruction_count(2);
6602 dst : S5(write);
6603 mem : S3(read);
6604 D0 : S0; // big decoder only
6605 DECODE : S1; // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // Float reg-mem operation
6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6612 %{
6613 instruction_count(3);
6614 dst : S5(write);
6615 src1 : S3(read);
6616 mem : S3(read);
6617 D0 : S0; // big decoder only
6618 DECODE : S1(2); // any decoder for FPU POP
6619 FPU : S4;
6620 MEM : S3; // any mem
6621 %}
6622
6623 // Float mem-reg operation
6624 pipe_class fpu_mem_reg(memory mem, regD src)
6625 %{
6626 instruction_count(2);
6627 src : S5(read);
6628 mem : S3(read);
6629 DECODE : S0; // any decoder for FPU PUSH
6630 D0 : S1; // big decoder only
6631 FPU : S4;
6632 MEM : S3; // any mem
6633 %}
6634
6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6636 %{
6637 instruction_count(3);
6638 src1 : S3(read);
6639 src2 : S3(read);
6640 mem : S3(read);
6641 DECODE : S0(2); // any decoder for FPU PUSH
6642 D0 : S1; // big decoder only
6643 FPU : S4;
6644 MEM : S3; // any mem
6645 %}
6646
6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6648 %{
6649 instruction_count(3);
6650 src1 : S3(read);
6651 src2 : S3(read);
6652 mem : S4(read);
6653 DECODE : S0; // any decoder for FPU PUSH
6654 D0 : S0(2); // big decoder only
6655 FPU : S4;
6656 MEM : S3(2); // any mem
6657 %}
6658
6659 pipe_class fpu_mem_mem(memory dst, memory src1)
6660 %{
6661 instruction_count(2);
6662 src1 : S3(read);
6663 dst : S4(read);
6664 D0 : S0(2); // big decoder only
6665 MEM : S3(2); // any mem
6666 %}
6667
6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6669 %{
6670 instruction_count(3);
6671 src1 : S3(read);
6672 src2 : S3(read);
6673 dst : S4(read);
6674 D0 : S0(3); // big decoder only
6675 FPU : S4;
6676 MEM : S3(3); // any mem
6677 %}
6678
6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6680 %{
6681 instruction_count(3);
6682 src1 : S4(read);
6683 mem : S4(read);
6684 DECODE : S0; // any decoder for FPU PUSH
6685 D0 : S0(2); // big decoder only
6686 FPU : S4;
6687 MEM : S3(2); // any mem
6688 %}
6689
6690 // Float load constant
6691 pipe_class fpu_reg_con(regD dst)
6692 %{
6693 instruction_count(2);
6694 dst : S5(write);
6695 D0 : S0; // big decoder only for the load
6696 DECODE : S1; // any decoder for FPU POP
6697 FPU : S4;
6698 MEM : S3; // any mem
6699 %}
6700
6701 // Float load constant
6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
6703 %{
6704 instruction_count(3);
6705 dst : S5(write);
6706 src : S3(read);
6707 D0 : S0; // big decoder only for the load
6708 DECODE : S1(2); // any decoder for FPU POP
6709 FPU : S4;
6710 MEM : S3; // any mem
6711 %}
6712
6713 // UnConditional branch
6714 pipe_class pipe_jmp(label labl)
6715 %{
6716 single_instruction;
6717 BR : S3;
6718 %}
6719
6720 // Conditional branch
6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6722 %{
6723 single_instruction;
6724 cr : S1(read);
6725 BR : S3;
6726 %}
6727
6728 // Allocation idiom
6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6730 %{
6731 instruction_count(1); force_serialization;
6732 fixed_latency(6);
6733 heap_ptr : S3(read);
6734 DECODE : S0(3);
6735 D0 : S2;
6736 MEM : S3;
6737 ALU : S3(2);
6738 dst : S5(write);
6739 BR : S5;
6740 %}
6741
6742 // Generic big/slow expanded idiom
6743 pipe_class pipe_slow()
6744 %{
6745 instruction_count(10); multiple_bundles; force_serialization;
6746 fixed_latency(100);
6747 D0 : S0(2);
6748 MEM : S3(2);
6749 %}
6750
6751 // The real do-nothing guy
6752 pipe_class empty()
6753 %{
6754 instruction_count(0);
6755 %}
6756
6757 // Define the class for the Nop node
6758 define
6759 %{
6760 MachNop = empty;
6761 %}
6762
6763 %}
6764
6765 //----------INSTRUCTIONS-------------------------------------------------------
6766 //
6767 // match -- States which machine-independent subtree may be replaced
6768 // by this instruction.
6769 // ins_cost -- The estimated cost of this instruction is used by instruction
6770 // selection to identify a minimum cost tree of machine
6771 // instructions that matches a tree of machine-independent
6772 // instructions.
6773 // format -- A string providing the disassembly for this instruction.
6774 // The value of an instruction's operand may be inserted
6775 // by referring to it with a '$' prefix.
6776 // opcode -- Three instruction opcodes may be provided. These are referred
6777 // to within an encode class as $primary, $secondary, and $tertiary
6778 // rrspectively. The primary opcode is commonly used to
6779 // indicate the type of machine instruction, while secondary
6780 // and tertiary are often used for prefix options or addressing
6781 // modes.
6782 // ins_encode -- A list of encode classes with parameters. The encode class
6783 // name must have been defined in an 'enc_class' specification
6784 // in the encode section of the architecture description.
6785
6786 // ============================================================================
6787
6788 instruct ShouldNotReachHere() %{
6789 match(Halt);
6790 format %{ "stop\t# ShouldNotReachHere" %}
6791 ins_encode %{
6792 if (is_reachable()) {
6793 const char* str = __ code_string(_halt_reason);
6794 __ stop(str);
6795 }
6796 %}
6797 ins_pipe(pipe_slow);
6798 %}
6799
6800 // ============================================================================
6801
6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6803 // Load Float
6804 instruct MoveF2VL(vlRegF dst, regF src) %{
6805 match(Set dst src);
6806 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6807 ins_encode %{
6808 ShouldNotReachHere();
6809 %}
6810 ins_pipe( fpu_reg_reg );
6811 %}
6812
6813 // Load Float
6814 instruct MoveF2LEG(legRegF dst, regF src) %{
6815 match(Set dst src);
6816 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6817 ins_encode %{
6818 ShouldNotReachHere();
6819 %}
6820 ins_pipe( fpu_reg_reg );
6821 %}
6822
6823 // Load Float
6824 instruct MoveVL2F(regF dst, vlRegF src) %{
6825 match(Set dst src);
6826 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6827 ins_encode %{
6828 ShouldNotReachHere();
6829 %}
6830 ins_pipe( fpu_reg_reg );
6831 %}
6832
6833 // Load Float
6834 instruct MoveLEG2F(regF dst, legRegF src) %{
6835 match(Set dst src);
6836 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6837 ins_encode %{
6838 ShouldNotReachHere();
6839 %}
6840 ins_pipe( fpu_reg_reg );
6841 %}
6842
6843 // Load Double
6844 instruct MoveD2VL(vlRegD dst, regD src) %{
6845 match(Set dst src);
6846 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6847 ins_encode %{
6848 ShouldNotReachHere();
6849 %}
6850 ins_pipe( fpu_reg_reg );
6851 %}
6852
6853 // Load Double
6854 instruct MoveD2LEG(legRegD dst, regD src) %{
6855 match(Set dst src);
6856 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6857 ins_encode %{
6858 ShouldNotReachHere();
6859 %}
6860 ins_pipe( fpu_reg_reg );
6861 %}
6862
6863 // Load Double
6864 instruct MoveVL2D(regD dst, vlRegD src) %{
6865 match(Set dst src);
6866 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6867 ins_encode %{
6868 ShouldNotReachHere();
6869 %}
6870 ins_pipe( fpu_reg_reg );
6871 %}
6872
6873 // Load Double
6874 instruct MoveLEG2D(regD dst, legRegD src) %{
6875 match(Set dst src);
6876 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6877 ins_encode %{
6878 ShouldNotReachHere();
6879 %}
6880 ins_pipe( fpu_reg_reg );
6881 %}
6882
6883 //----------Load/Store/Move Instructions---------------------------------------
6884 //----------Load Instructions--------------------------------------------------
6885
6886 // Load Byte (8 bit signed)
6887 instruct loadB(rRegI dst, memory mem)
6888 %{
6889 match(Set dst (LoadB mem));
6890
6891 ins_cost(125);
6892 format %{ "movsbl $dst, $mem\t# byte" %}
6893
6894 ins_encode %{
6895 __ movsbl($dst$$Register, $mem$$Address);
6896 %}
6897
6898 ins_pipe(ialu_reg_mem);
6899 %}
6900
6901 // Load Byte (8 bit signed) into Long Register
6902 instruct loadB2L(rRegL dst, memory mem)
6903 %{
6904 match(Set dst (ConvI2L (LoadB mem)));
6905
6906 ins_cost(125);
6907 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6908
6909 ins_encode %{
6910 __ movsbq($dst$$Register, $mem$$Address);
6911 %}
6912
6913 ins_pipe(ialu_reg_mem);
6914 %}
6915
6916 // Load Unsigned Byte (8 bit UNsigned)
6917 instruct loadUB(rRegI dst, memory mem)
6918 %{
6919 match(Set dst (LoadUB mem));
6920
6921 ins_cost(125);
6922 format %{ "movzbl $dst, $mem\t# ubyte" %}
6923
6924 ins_encode %{
6925 __ movzbl($dst$$Register, $mem$$Address);
6926 %}
6927
6928 ins_pipe(ialu_reg_mem);
6929 %}
6930
6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6932 instruct loadUB2L(rRegL dst, memory mem)
6933 %{
6934 match(Set dst (ConvI2L (LoadUB mem)));
6935
6936 ins_cost(125);
6937 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6938
6939 ins_encode %{
6940 __ movzbq($dst$$Register, $mem$$Address);
6941 %}
6942
6943 ins_pipe(ialu_reg_mem);
6944 %}
6945
6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6948 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6949 effect(KILL cr);
6950
6951 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6952 "andl $dst, right_n_bits($mask, 8)" %}
6953 ins_encode %{
6954 Register Rdst = $dst$$Register;
6955 __ movzbq(Rdst, $mem$$Address);
6956 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6957 %}
6958 ins_pipe(ialu_reg_mem);
6959 %}
6960
6961 // Load Short (16 bit signed)
6962 instruct loadS(rRegI dst, memory mem)
6963 %{
6964 match(Set dst (LoadS mem));
6965
6966 ins_cost(125);
6967 format %{ "movswl $dst, $mem\t# short" %}
6968
6969 ins_encode %{
6970 __ movswl($dst$$Register, $mem$$Address);
6971 %}
6972
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Short (16 bit signed) to Byte (8 bit signed)
6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6978 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6979
6980 ins_cost(125);
6981 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6982 ins_encode %{
6983 __ movsbl($dst$$Register, $mem$$Address);
6984 %}
6985 ins_pipe(ialu_reg_mem);
6986 %}
6987
6988 // Load Short (16 bit signed) into Long Register
6989 instruct loadS2L(rRegL dst, memory mem)
6990 %{
6991 match(Set dst (ConvI2L (LoadS mem)));
6992
6993 ins_cost(125);
6994 format %{ "movswq $dst, $mem\t# short -> long" %}
6995
6996 ins_encode %{
6997 __ movswq($dst$$Register, $mem$$Address);
6998 %}
6999
7000 ins_pipe(ialu_reg_mem);
7001 %}
7002
7003 // Load Unsigned Short/Char (16 bit UNsigned)
7004 instruct loadUS(rRegI dst, memory mem)
7005 %{
7006 match(Set dst (LoadUS mem));
7007
7008 ins_cost(125);
7009 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7010
7011 ins_encode %{
7012 __ movzwl($dst$$Register, $mem$$Address);
7013 %}
7014
7015 ins_pipe(ialu_reg_mem);
7016 %}
7017
7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7020 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7021
7022 ins_cost(125);
7023 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7024 ins_encode %{
7025 __ movsbl($dst$$Register, $mem$$Address);
7026 %}
7027 ins_pipe(ialu_reg_mem);
7028 %}
7029
7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7031 instruct loadUS2L(rRegL dst, memory mem)
7032 %{
7033 match(Set dst (ConvI2L (LoadUS mem)));
7034
7035 ins_cost(125);
7036 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7037
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041
7042 ins_pipe(ialu_reg_mem);
7043 %}
7044
7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7047 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7048
7049 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7050 ins_encode %{
7051 __ movzbq($dst$$Register, $mem$$Address);
7052 %}
7053 ins_pipe(ialu_reg_mem);
7054 %}
7055
7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7058 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7059 effect(KILL cr);
7060
7061 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7062 "andl $dst, right_n_bits($mask, 16)" %}
7063 ins_encode %{
7064 Register Rdst = $dst$$Register;
7065 __ movzwq(Rdst, $mem$$Address);
7066 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7067 %}
7068 ins_pipe(ialu_reg_mem);
7069 %}
7070
7071 // Load Integer
7072 instruct loadI(rRegI dst, memory mem)
7073 %{
7074 match(Set dst (LoadI mem));
7075
7076 ins_cost(125);
7077 format %{ "movl $dst, $mem\t# int" %}
7078
7079 ins_encode %{
7080 __ movl($dst$$Register, $mem$$Address);
7081 %}
7082
7083 ins_pipe(ialu_reg_mem);
7084 %}
7085
7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7088 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7089
7090 ins_cost(125);
7091 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7092 ins_encode %{
7093 __ movsbl($dst$$Register, $mem$$Address);
7094 %}
7095 ins_pipe(ialu_reg_mem);
7096 %}
7097
7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7100 match(Set dst (AndI (LoadI mem) mask));
7101
7102 ins_cost(125);
7103 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7104 ins_encode %{
7105 __ movzbl($dst$$Register, $mem$$Address);
7106 %}
7107 ins_pipe(ialu_reg_mem);
7108 %}
7109
7110 // Load Integer (32 bit signed) to Short (16 bit signed)
7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7112 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7113
7114 ins_cost(125);
7115 format %{ "movswl $dst, $mem\t# int -> short" %}
7116 ins_encode %{
7117 __ movswl($dst$$Register, $mem$$Address);
7118 %}
7119 ins_pipe(ialu_reg_mem);
7120 %}
7121
7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7124 match(Set dst (AndI (LoadI mem) mask));
7125
7126 ins_cost(125);
7127 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7128 ins_encode %{
7129 __ movzwl($dst$$Register, $mem$$Address);
7130 %}
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer into Long Register
7135 instruct loadI2L(rRegL dst, memory mem)
7136 %{
7137 match(Set dst (ConvI2L (LoadI mem)));
7138
7139 ins_cost(125);
7140 format %{ "movslq $dst, $mem\t# int -> long" %}
7141
7142 ins_encode %{
7143 __ movslq($dst$$Register, $mem$$Address);
7144 %}
7145
7146 ins_pipe(ialu_reg_mem);
7147 %}
7148
7149 // Load Integer with mask 0xFF into Long Register
7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7151 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7152
7153 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7154 ins_encode %{
7155 __ movzbq($dst$$Register, $mem$$Address);
7156 %}
7157 ins_pipe(ialu_reg_mem);
7158 %}
7159
7160 // Load Integer with mask 0xFFFF into Long Register
7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7162 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7163
7164 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7165 ins_encode %{
7166 __ movzwq($dst$$Register, $mem$$Address);
7167 %}
7168 ins_pipe(ialu_reg_mem);
7169 %}
7170
7171 // Load Integer with a 31-bit mask into Long Register
7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7173 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7174 effect(KILL cr);
7175
7176 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7177 "andl $dst, $mask" %}
7178 ins_encode %{
7179 Register Rdst = $dst$$Register;
7180 __ movl(Rdst, $mem$$Address);
7181 __ andl(Rdst, $mask$$constant);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Unsigned Integer into Long Register
7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7188 %{
7189 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7190
7191 ins_cost(125);
7192 format %{ "movl $dst, $mem\t# uint -> long" %}
7193
7194 ins_encode %{
7195 __ movl($dst$$Register, $mem$$Address);
7196 %}
7197
7198 ins_pipe(ialu_reg_mem);
7199 %}
7200
7201 // Load Long
7202 instruct loadL(rRegL dst, memory mem)
7203 %{
7204 match(Set dst (LoadL mem));
7205
7206 ins_cost(125);
7207 format %{ "movq $dst, $mem\t# long" %}
7208
7209 ins_encode %{
7210 __ movq($dst$$Register, $mem$$Address);
7211 %}
7212
7213 ins_pipe(ialu_reg_mem); // XXX
7214 %}
7215
7216 // Load Range
7217 instruct loadRange(rRegI dst, memory mem)
7218 %{
7219 match(Set dst (LoadRange mem));
7220
7221 ins_cost(125); // XXX
7222 format %{ "movl $dst, $mem\t# range" %}
7223 ins_encode %{
7224 __ movl($dst$$Register, $mem$$Address);
7225 %}
7226 ins_pipe(ialu_reg_mem);
7227 %}
7228
7229 // Load Pointer
7230 instruct loadP(rRegP dst, memory mem)
7231 %{
7232 match(Set dst (LoadP mem));
7233 predicate(n->as_Load()->barrier_data() == 0);
7234
7235 ins_cost(125); // XXX
7236 format %{ "movq $dst, $mem\t# ptr" %}
7237 ins_encode %{
7238 __ movq($dst$$Register, $mem$$Address);
7239 %}
7240 ins_pipe(ialu_reg_mem); // XXX
7241 %}
7242
7243 // Load Compressed Pointer
7244 instruct loadN(rRegN dst, memory mem)
7245 %{
7246 predicate(n->as_Load()->barrier_data() == 0);
7247 match(Set dst (LoadN mem));
7248
7249 ins_cost(125); // XXX
7250 format %{ "movl $dst, $mem\t# compressed ptr" %}
7251 ins_encode %{
7252 __ movl($dst$$Register, $mem$$Address);
7253 %}
7254 ins_pipe(ialu_reg_mem); // XXX
7255 %}
7256
7257
7258 // Load Klass Pointer
7259 instruct loadKlass(rRegP dst, memory mem)
7260 %{
7261 match(Set dst (LoadKlass mem));
7262
7263 ins_cost(125); // XXX
7264 format %{ "movq $dst, $mem\t# class" %}
7265 ins_encode %{
7266 __ movq($dst$$Register, $mem$$Address);
7267 %}
7268 ins_pipe(ialu_reg_mem); // XXX
7269 %}
7270
7271 // Load narrow Klass Pointer
7272 instruct loadNKlass(rRegN dst, memory mem)
7273 %{
7274 predicate(!UseCompactObjectHeaders);
7275 match(Set dst (LoadNKlass mem));
7276
7277 ins_cost(125); // XXX
7278 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7279 ins_encode %{
7280 __ movl($dst$$Register, $mem$$Address);
7281 %}
7282 ins_pipe(ialu_reg_mem); // XXX
7283 %}
7284
7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7286 %{
7287 predicate(UseCompactObjectHeaders);
7288 match(Set dst (LoadNKlass mem));
7289 effect(KILL cr);
7290 ins_cost(125);
7291 format %{
7292 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7293 "shrl $dst, markWord::klass_shift_at_offset"
7294 %}
7295 ins_encode %{
7296 if (UseAPX) {
7297 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7298 }
7299 else {
7300 __ movl($dst$$Register, $mem$$Address);
7301 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7302 }
7303 %}
7304 ins_pipe(ialu_reg_mem);
7305 %}
7306
7307 // Load Float
7308 instruct loadF(regF dst, memory mem)
7309 %{
7310 match(Set dst (LoadF mem));
7311
7312 ins_cost(145); // XXX
7313 format %{ "movss $dst, $mem\t# float" %}
7314 ins_encode %{
7315 __ movflt($dst$$XMMRegister, $mem$$Address);
7316 %}
7317 ins_pipe(pipe_slow); // XXX
7318 %}
7319
7320 // Load Double
7321 instruct loadD_partial(regD dst, memory mem)
7322 %{
7323 predicate(!UseXmmLoadAndClearUpper);
7324 match(Set dst (LoadD mem));
7325
7326 ins_cost(145); // XXX
7327 format %{ "movlpd $dst, $mem\t# double" %}
7328 ins_encode %{
7329 __ movdbl($dst$$XMMRegister, $mem$$Address);
7330 %}
7331 ins_pipe(pipe_slow); // XXX
7332 %}
7333
7334 instruct loadD(regD dst, memory mem)
7335 %{
7336 predicate(UseXmmLoadAndClearUpper);
7337 match(Set dst (LoadD mem));
7338
7339 ins_cost(145); // XXX
7340 format %{ "movsd $dst, $mem\t# double" %}
7341 ins_encode %{
7342 __ movdbl($dst$$XMMRegister, $mem$$Address);
7343 %}
7344 ins_pipe(pipe_slow); // XXX
7345 %}
7346
7347 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7348 %{
7349 match(Set dst con);
7350
7351 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7352
7353 ins_encode %{
7354 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7355 %}
7356
7357 ins_pipe(ialu_reg_fat);
7358 %}
7359
7360 // max = java.lang.Math.max(float a, float b)
7361 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7362 predicate(VM_Version::supports_avx10_2());
7363 match(Set dst (MaxF a b));
7364 format %{ "maxF $dst, $a, $b" %}
7365 ins_encode %{
7366 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7367 %}
7368 ins_pipe( pipe_slow );
7369 %}
7370
7371 // max = java.lang.Math.max(float a, float b)
7372 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7373 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7374 match(Set dst (MaxF a b));
7375 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7376 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7377 ins_encode %{
7378 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7379 %}
7380 ins_pipe( pipe_slow );
7381 %}
7382
7383 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7384 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7385 match(Set dst (MaxF a b));
7386 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7387
7388 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7389 ins_encode %{
7390 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7391 false /*min*/, true /*single*/);
7392 %}
7393 ins_pipe( pipe_slow );
7394 %}
7395
7396 // max = java.lang.Math.max(double a, double b)
7397 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7398 predicate(VM_Version::supports_avx10_2());
7399 match(Set dst (MaxD a b));
7400 format %{ "maxD $dst, $a, $b" %}
7401 ins_encode %{
7402 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7403 %}
7404 ins_pipe( pipe_slow );
7405 %}
7406
7407 // max = java.lang.Math.max(double a, double b)
7408 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7409 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7410 match(Set dst (MaxD a b));
7411 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7412 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7413 ins_encode %{
7414 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7415 %}
7416 ins_pipe( pipe_slow );
7417 %}
7418
7419 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7420 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7421 match(Set dst (MaxD a b));
7422 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7423
7424 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7425 ins_encode %{
7426 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7427 false /*min*/, false /*single*/);
7428 %}
7429 ins_pipe( pipe_slow );
7430 %}
7431
7432 // max = java.lang.Math.min(float a, float b)
7433 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7434 predicate(VM_Version::supports_avx10_2());
7435 match(Set dst (MinF a b));
7436 format %{ "minF $dst, $a, $b" %}
7437 ins_encode %{
7438 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7439 %}
7440 ins_pipe( pipe_slow );
7441 %}
7442
7443 // min = java.lang.Math.min(float a, float b)
7444 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7445 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7446 match(Set dst (MinF a b));
7447 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7448 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7449 ins_encode %{
7450 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7451 %}
7452 ins_pipe( pipe_slow );
7453 %}
7454
7455 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7456 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7457 match(Set dst (MinF a b));
7458 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7459
7460 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7461 ins_encode %{
7462 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7463 true /*min*/, true /*single*/);
7464 %}
7465 ins_pipe( pipe_slow );
7466 %}
7467
7468 // max = java.lang.Math.min(double a, double b)
7469 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7470 predicate(VM_Version::supports_avx10_2());
7471 match(Set dst (MinD a b));
7472 format %{ "minD $dst, $a, $b" %}
7473 ins_encode %{
7474 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7475 %}
7476 ins_pipe( pipe_slow );
7477 %}
7478
7479 // min = java.lang.Math.min(double a, double b)
7480 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7481 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7482 match(Set dst (MinD a b));
7483 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7484 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7485 ins_encode %{
7486 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7487 %}
7488 ins_pipe( pipe_slow );
7489 %}
7490
7491 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7492 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7493 match(Set dst (MinD a b));
7494 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7495
7496 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7497 ins_encode %{
7498 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7499 true /*min*/, false /*single*/);
7500 %}
7501 ins_pipe( pipe_slow );
7502 %}
7503
7504 // Load Effective Address
7505 instruct leaP8(rRegP dst, indOffset8 mem)
7506 %{
7507 match(Set dst mem);
7508
7509 ins_cost(110); // XXX
7510 format %{ "leaq $dst, $mem\t# ptr 8" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaP32(rRegP dst, indOffset32 mem)
7518 %{
7519 match(Set dst mem);
7520
7521 ins_cost(110);
7522 format %{ "leaq $dst, $mem\t# ptr 32" %}
7523 ins_encode %{
7524 __ leaq($dst$$Register, $mem$$Address);
7525 %}
7526 ins_pipe(ialu_reg_reg_fat);
7527 %}
7528
7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7530 %{
7531 match(Set dst mem);
7532
7533 ins_cost(110);
7534 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7535 ins_encode %{
7536 __ leaq($dst$$Register, $mem$$Address);
7537 %}
7538 ins_pipe(ialu_reg_reg_fat);
7539 %}
7540
7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7542 %{
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7554 %{
7555 match(Set dst mem);
7556
7557 ins_cost(110);
7558 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7559 ins_encode %{
7560 __ leaq($dst$$Register, $mem$$Address);
7561 %}
7562 ins_pipe(ialu_reg_reg_fat);
7563 %}
7564
7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7590 %{
7591 match(Set dst mem);
7592
7593 ins_cost(110);
7594 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7595 ins_encode %{
7596 __ leaq($dst$$Register, $mem$$Address);
7597 %}
7598 ins_pipe(ialu_reg_reg_fat);
7599 %}
7600
7601 // Load Effective Address which uses Narrow (32-bits) oop
7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7603 %{
7604 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7605 match(Set dst mem);
7606
7607 ins_cost(110);
7608 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7609 ins_encode %{
7610 __ leaq($dst$$Register, $mem$$Address);
7611 %}
7612 ins_pipe(ialu_reg_reg_fat);
7613 %}
7614
7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7616 %{
7617 predicate(CompressedOops::shift() == 0);
7618 match(Set dst mem);
7619
7620 ins_cost(110); // XXX
7621 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7622 ins_encode %{
7623 __ leaq($dst$$Register, $mem$$Address);
7624 %}
7625 ins_pipe(ialu_reg_reg_fat);
7626 %}
7627
7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7629 %{
7630 predicate(CompressedOops::shift() == 0);
7631 match(Set dst mem);
7632
7633 ins_cost(110);
7634 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7635 ins_encode %{
7636 __ leaq($dst$$Register, $mem$$Address);
7637 %}
7638 ins_pipe(ialu_reg_reg_fat);
7639 %}
7640
7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7642 %{
7643 predicate(CompressedOops::shift() == 0);
7644 match(Set dst mem);
7645
7646 ins_cost(110);
7647 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7648 ins_encode %{
7649 __ leaq($dst$$Register, $mem$$Address);
7650 %}
7651 ins_pipe(ialu_reg_reg_fat);
7652 %}
7653
7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7655 %{
7656 predicate(CompressedOops::shift() == 0);
7657 match(Set dst mem);
7658
7659 ins_cost(110);
7660 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7661 ins_encode %{
7662 __ leaq($dst$$Register, $mem$$Address);
7663 %}
7664 ins_pipe(ialu_reg_reg_fat);
7665 %}
7666
7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7668 %{
7669 predicate(CompressedOops::shift() == 0);
7670 match(Set dst mem);
7671
7672 ins_cost(110);
7673 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7674 ins_encode %{
7675 __ leaq($dst$$Register, $mem$$Address);
7676 %}
7677 ins_pipe(ialu_reg_reg_fat);
7678 %}
7679
7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7681 %{
7682 predicate(CompressedOops::shift() == 0);
7683 match(Set dst mem);
7684
7685 ins_cost(110);
7686 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7687 ins_encode %{
7688 __ leaq($dst$$Register, $mem$$Address);
7689 %}
7690 ins_pipe(ialu_reg_reg_fat);
7691 %}
7692
7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7694 %{
7695 predicate(CompressedOops::shift() == 0);
7696 match(Set dst mem);
7697
7698 ins_cost(110);
7699 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7700 ins_encode %{
7701 __ leaq($dst$$Register, $mem$$Address);
7702 %}
7703 ins_pipe(ialu_reg_reg_fat);
7704 %}
7705
7706 instruct loadConI(rRegI dst, immI src)
7707 %{
7708 match(Set dst src);
7709
7710 format %{ "movl $dst, $src\t# int" %}
7711 ins_encode %{
7712 __ movl($dst$$Register, $src$$constant);
7713 %}
7714 ins_pipe(ialu_reg_fat); // XXX
7715 %}
7716
7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7718 %{
7719 match(Set dst src);
7720 effect(KILL cr);
7721
7722 ins_cost(50);
7723 format %{ "xorl $dst, $dst\t# int" %}
7724 ins_encode %{
7725 __ xorl($dst$$Register, $dst$$Register);
7726 %}
7727 ins_pipe(ialu_reg);
7728 %}
7729
7730 instruct loadConL(rRegL dst, immL src)
7731 %{
7732 match(Set dst src);
7733
7734 ins_cost(150);
7735 format %{ "movq $dst, $src\t# long" %}
7736 ins_encode %{
7737 __ mov64($dst$$Register, $src$$constant);
7738 %}
7739 ins_pipe(ialu_reg);
7740 %}
7741
7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7743 %{
7744 match(Set dst src);
7745 effect(KILL cr);
7746
7747 ins_cost(50);
7748 format %{ "xorl $dst, $dst\t# long" %}
7749 ins_encode %{
7750 __ xorl($dst$$Register, $dst$$Register);
7751 %}
7752 ins_pipe(ialu_reg); // XXX
7753 %}
7754
7755 instruct loadConUL32(rRegL dst, immUL32 src)
7756 %{
7757 match(Set dst src);
7758
7759 ins_cost(60);
7760 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7761 ins_encode %{
7762 __ movl($dst$$Register, $src$$constant);
7763 %}
7764 ins_pipe(ialu_reg);
7765 %}
7766
7767 instruct loadConL32(rRegL dst, immL32 src)
7768 %{
7769 match(Set dst src);
7770
7771 ins_cost(70);
7772 format %{ "movq $dst, $src\t# long (32-bit)" %}
7773 ins_encode %{
7774 __ movq($dst$$Register, $src$$constant);
7775 %}
7776 ins_pipe(ialu_reg);
7777 %}
7778
7779 instruct loadConP(rRegP dst, immP con) %{
7780 match(Set dst con);
7781
7782 format %{ "movq $dst, $con\t# ptr" %}
7783 ins_encode %{
7784 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7785 %}
7786 ins_pipe(ialu_reg_fat); // XXX
7787 %}
7788
7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7790 %{
7791 match(Set dst src);
7792 effect(KILL cr);
7793
7794 ins_cost(50);
7795 format %{ "xorl $dst, $dst\t# ptr" %}
7796 ins_encode %{
7797 __ xorl($dst$$Register, $dst$$Register);
7798 %}
7799 ins_pipe(ialu_reg);
7800 %}
7801
7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7803 %{
7804 match(Set dst src);
7805 effect(KILL cr);
7806
7807 ins_cost(60);
7808 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7809 ins_encode %{
7810 __ movl($dst$$Register, $src$$constant);
7811 %}
7812 ins_pipe(ialu_reg);
7813 %}
7814
7815 instruct loadConF(regF dst, immF con) %{
7816 match(Set dst con);
7817 ins_cost(125);
7818 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7819 ins_encode %{
7820 __ movflt($dst$$XMMRegister, $constantaddress($con));
7821 %}
7822 ins_pipe(pipe_slow);
7823 %}
7824
7825 instruct loadConH(regF dst, immH con) %{
7826 match(Set dst con);
7827 ins_cost(125);
7828 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7829 ins_encode %{
7830 __ movflt($dst$$XMMRegister, $constantaddress($con));
7831 %}
7832 ins_pipe(pipe_slow);
7833 %}
7834
7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7836 match(Set dst src);
7837 effect(KILL cr);
7838 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7839 ins_encode %{
7840 __ xorq($dst$$Register, $dst$$Register);
7841 %}
7842 ins_pipe(ialu_reg);
7843 %}
7844
7845 instruct loadConN(rRegN dst, immN src) %{
7846 match(Set dst src);
7847
7848 ins_cost(125);
7849 format %{ "movl $dst, $src\t# compressed ptr" %}
7850 ins_encode %{
7851 address con = (address)$src$$constant;
7852 if (con == nullptr) {
7853 ShouldNotReachHere();
7854 } else {
7855 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7856 }
7857 %}
7858 ins_pipe(ialu_reg_fat); // XXX
7859 %}
7860
7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7862 match(Set dst src);
7863
7864 ins_cost(125);
7865 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7866 ins_encode %{
7867 address con = (address)$src$$constant;
7868 if (con == nullptr) {
7869 ShouldNotReachHere();
7870 } else {
7871 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7872 }
7873 %}
7874 ins_pipe(ialu_reg_fat); // XXX
7875 %}
7876
7877 instruct loadConF0(regF dst, immF0 src)
7878 %{
7879 match(Set dst src);
7880 ins_cost(100);
7881
7882 format %{ "xorps $dst, $dst\t# float 0.0" %}
7883 ins_encode %{
7884 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7885 %}
7886 ins_pipe(pipe_slow);
7887 %}
7888
7889 // Use the same format since predicate() can not be used here.
7890 instruct loadConD(regD dst, immD con) %{
7891 match(Set dst con);
7892 ins_cost(125);
7893 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7894 ins_encode %{
7895 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7896 %}
7897 ins_pipe(pipe_slow);
7898 %}
7899
7900 instruct loadConD0(regD dst, immD0 src)
7901 %{
7902 match(Set dst src);
7903 ins_cost(100);
7904
7905 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7906 ins_encode %{
7907 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7908 %}
7909 ins_pipe(pipe_slow);
7910 %}
7911
7912 instruct loadSSI(rRegI dst, stackSlotI src)
7913 %{
7914 match(Set dst src);
7915
7916 ins_cost(125);
7917 format %{ "movl $dst, $src\t# int stk" %}
7918 ins_encode %{
7919 __ movl($dst$$Register, $src$$Address);
7920 %}
7921 ins_pipe(ialu_reg_mem);
7922 %}
7923
7924 instruct loadSSL(rRegL dst, stackSlotL src)
7925 %{
7926 match(Set dst src);
7927
7928 ins_cost(125);
7929 format %{ "movq $dst, $src\t# long stk" %}
7930 ins_encode %{
7931 __ movq($dst$$Register, $src$$Address);
7932 %}
7933 ins_pipe(ialu_reg_mem);
7934 %}
7935
7936 instruct loadSSP(rRegP dst, stackSlotP src)
7937 %{
7938 match(Set dst src);
7939
7940 ins_cost(125);
7941 format %{ "movq $dst, $src\t# ptr stk" %}
7942 ins_encode %{
7943 __ movq($dst$$Register, $src$$Address);
7944 %}
7945 ins_pipe(ialu_reg_mem);
7946 %}
7947
7948 instruct loadSSF(regF dst, stackSlotF src)
7949 %{
7950 match(Set dst src);
7951
7952 ins_cost(125);
7953 format %{ "movss $dst, $src\t# float stk" %}
7954 ins_encode %{
7955 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7956 %}
7957 ins_pipe(pipe_slow); // XXX
7958 %}
7959
7960 // Use the same format since predicate() can not be used here.
7961 instruct loadSSD(regD dst, stackSlotD src)
7962 %{
7963 match(Set dst src);
7964
7965 ins_cost(125);
7966 format %{ "movsd $dst, $src\t# double stk" %}
7967 ins_encode %{
7968 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7969 %}
7970 ins_pipe(pipe_slow); // XXX
7971 %}
7972
7973 // Prefetch instructions for allocation.
7974 // Must be safe to execute with invalid address (cannot fault).
7975
7976 instruct prefetchAlloc( memory mem ) %{
7977 predicate(AllocatePrefetchInstr==3);
7978 match(PrefetchAllocation mem);
7979 ins_cost(125);
7980
7981 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7982 ins_encode %{
7983 __ prefetchw($mem$$Address);
7984 %}
7985 ins_pipe(ialu_mem);
7986 %}
7987
7988 instruct prefetchAllocNTA( memory mem ) %{
7989 predicate(AllocatePrefetchInstr==0);
7990 match(PrefetchAllocation mem);
7991 ins_cost(125);
7992
7993 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7994 ins_encode %{
7995 __ prefetchnta($mem$$Address);
7996 %}
7997 ins_pipe(ialu_mem);
7998 %}
7999
8000 instruct prefetchAllocT0( memory mem ) %{
8001 predicate(AllocatePrefetchInstr==1);
8002 match(PrefetchAllocation mem);
8003 ins_cost(125);
8004
8005 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8006 ins_encode %{
8007 __ prefetcht0($mem$$Address);
8008 %}
8009 ins_pipe(ialu_mem);
8010 %}
8011
8012 instruct prefetchAllocT2( memory mem ) %{
8013 predicate(AllocatePrefetchInstr==2);
8014 match(PrefetchAllocation mem);
8015 ins_cost(125);
8016
8017 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8018 ins_encode %{
8019 __ prefetcht2($mem$$Address);
8020 %}
8021 ins_pipe(ialu_mem);
8022 %}
8023
8024 //----------Store Instructions-------------------------------------------------
8025
8026 // Store Byte
8027 instruct storeB(memory mem, rRegI src)
8028 %{
8029 match(Set mem (StoreB mem src));
8030
8031 ins_cost(125); // XXX
8032 format %{ "movb $mem, $src\t# byte" %}
8033 ins_encode %{
8034 __ movb($mem$$Address, $src$$Register);
8035 %}
8036 ins_pipe(ialu_mem_reg);
8037 %}
8038
8039 // Store Char/Short
8040 instruct storeC(memory mem, rRegI src)
8041 %{
8042 match(Set mem (StoreC mem src));
8043
8044 ins_cost(125); // XXX
8045 format %{ "movw $mem, $src\t# char/short" %}
8046 ins_encode %{
8047 __ movw($mem$$Address, $src$$Register);
8048 %}
8049 ins_pipe(ialu_mem_reg);
8050 %}
8051
8052 // Store Integer
8053 instruct storeI(memory mem, rRegI src)
8054 %{
8055 match(Set mem (StoreI mem src));
8056
8057 ins_cost(125); // XXX
8058 format %{ "movl $mem, $src\t# int" %}
8059 ins_encode %{
8060 __ movl($mem$$Address, $src$$Register);
8061 %}
8062 ins_pipe(ialu_mem_reg);
8063 %}
8064
8065 // Store Long
8066 instruct storeL(memory mem, rRegL src)
8067 %{
8068 match(Set mem (StoreL mem src));
8069
8070 ins_cost(125); // XXX
8071 format %{ "movq $mem, $src\t# long" %}
8072 ins_encode %{
8073 __ movq($mem$$Address, $src$$Register);
8074 %}
8075 ins_pipe(ialu_mem_reg); // XXX
8076 %}
8077
8078 // Store Pointer
8079 instruct storeP(memory mem, any_RegP src)
8080 %{
8081 predicate(n->as_Store()->barrier_data() == 0);
8082 match(Set mem (StoreP mem src));
8083
8084 ins_cost(125); // XXX
8085 format %{ "movq $mem, $src\t# ptr" %}
8086 ins_encode %{
8087 __ movq($mem$$Address, $src$$Register);
8088 %}
8089 ins_pipe(ialu_mem_reg);
8090 %}
8091
8092 instruct storeImmP0(memory mem, immP0 zero)
8093 %{
8094 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8095 match(Set mem (StoreP mem zero));
8096
8097 ins_cost(125); // XXX
8098 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8099 ins_encode %{
8100 __ movq($mem$$Address, r12);
8101 %}
8102 ins_pipe(ialu_mem_reg);
8103 %}
8104
8105 // Store Null Pointer, mark word, or other simple pointer constant.
8106 instruct storeImmP(memory mem, immP31 src)
8107 %{
8108 predicate(n->as_Store()->barrier_data() == 0);
8109 match(Set mem (StoreP mem src));
8110
8111 ins_cost(150); // XXX
8112 format %{ "movq $mem, $src\t# ptr" %}
8113 ins_encode %{
8114 __ movq($mem$$Address, $src$$constant);
8115 %}
8116 ins_pipe(ialu_mem_imm);
8117 %}
8118
8119 // Store Compressed Pointer
8120 instruct storeN(memory mem, rRegN src)
8121 %{
8122 predicate(n->as_Store()->barrier_data() == 0);
8123 match(Set mem (StoreN mem src));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, $src\t# compressed ptr" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, $src$$Register);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeNKlass(memory mem, rRegN src)
8134 %{
8135 match(Set mem (StoreNKlass mem src));
8136
8137 ins_cost(125); // XXX
8138 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8139 ins_encode %{
8140 __ movl($mem$$Address, $src$$Register);
8141 %}
8142 ins_pipe(ialu_mem_reg);
8143 %}
8144
8145 instruct storeImmN0(memory mem, immN0 zero)
8146 %{
8147 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8148 match(Set mem (StoreN mem zero));
8149
8150 ins_cost(125); // XXX
8151 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8152 ins_encode %{
8153 __ movl($mem$$Address, r12);
8154 %}
8155 ins_pipe(ialu_mem_reg);
8156 %}
8157
8158 instruct storeImmN(memory mem, immN src)
8159 %{
8160 predicate(n->as_Store()->barrier_data() == 0);
8161 match(Set mem (StoreN mem src));
8162
8163 ins_cost(150); // XXX
8164 format %{ "movl $mem, $src\t# compressed ptr" %}
8165 ins_encode %{
8166 address con = (address)$src$$constant;
8167 if (con == nullptr) {
8168 __ movl($mem$$Address, 0);
8169 } else {
8170 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8171 }
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 instruct storeImmNKlass(memory mem, immNKlass src)
8177 %{
8178 match(Set mem (StoreNKlass mem src));
8179
8180 ins_cost(150); // XXX
8181 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8182 ins_encode %{
8183 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8184 %}
8185 ins_pipe(ialu_mem_imm);
8186 %}
8187
8188 // Store Integer Immediate
8189 instruct storeImmI0(memory mem, immI_0 zero)
8190 %{
8191 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8192 match(Set mem (StoreI mem zero));
8193
8194 ins_cost(125); // XXX
8195 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, r12);
8198 %}
8199 ins_pipe(ialu_mem_reg);
8200 %}
8201
8202 instruct storeImmI(memory mem, immI src)
8203 %{
8204 match(Set mem (StoreI mem src));
8205
8206 ins_cost(150);
8207 format %{ "movl $mem, $src\t# int" %}
8208 ins_encode %{
8209 __ movl($mem$$Address, $src$$constant);
8210 %}
8211 ins_pipe(ialu_mem_imm);
8212 %}
8213
8214 // Store Long Immediate
8215 instruct storeImmL0(memory mem, immL0 zero)
8216 %{
8217 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8218 match(Set mem (StoreL mem zero));
8219
8220 ins_cost(125); // XXX
8221 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8222 ins_encode %{
8223 __ movq($mem$$Address, r12);
8224 %}
8225 ins_pipe(ialu_mem_reg);
8226 %}
8227
8228 instruct storeImmL(memory mem, immL32 src)
8229 %{
8230 match(Set mem (StoreL mem src));
8231
8232 ins_cost(150);
8233 format %{ "movq $mem, $src\t# long" %}
8234 ins_encode %{
8235 __ movq($mem$$Address, $src$$constant);
8236 %}
8237 ins_pipe(ialu_mem_imm);
8238 %}
8239
8240 // Store Short/Char Immediate
8241 instruct storeImmC0(memory mem, immI_0 zero)
8242 %{
8243 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8244 match(Set mem (StoreC mem zero));
8245
8246 ins_cost(125); // XXX
8247 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8248 ins_encode %{
8249 __ movw($mem$$Address, r12);
8250 %}
8251 ins_pipe(ialu_mem_reg);
8252 %}
8253
8254 instruct storeImmI16(memory mem, immI16 src)
8255 %{
8256 predicate(UseStoreImmI16);
8257 match(Set mem (StoreC mem src));
8258
8259 ins_cost(150);
8260 format %{ "movw $mem, $src\t# short/char" %}
8261 ins_encode %{
8262 __ movw($mem$$Address, $src$$constant);
8263 %}
8264 ins_pipe(ialu_mem_imm);
8265 %}
8266
8267 // Store Byte Immediate
8268 instruct storeImmB0(memory mem, immI_0 zero)
8269 %{
8270 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8271 match(Set mem (StoreB mem zero));
8272
8273 ins_cost(125); // XXX
8274 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8275 ins_encode %{
8276 __ movb($mem$$Address, r12);
8277 %}
8278 ins_pipe(ialu_mem_reg);
8279 %}
8280
8281 instruct storeImmB(memory mem, immI8 src)
8282 %{
8283 match(Set mem (StoreB mem src));
8284
8285 ins_cost(150); // XXX
8286 format %{ "movb $mem, $src\t# byte" %}
8287 ins_encode %{
8288 __ movb($mem$$Address, $src$$constant);
8289 %}
8290 ins_pipe(ialu_mem_imm);
8291 %}
8292
8293 // Store Float
8294 instruct storeF(memory mem, regF src)
8295 %{
8296 match(Set mem (StoreF mem src));
8297
8298 ins_cost(95); // XXX
8299 format %{ "movss $mem, $src\t# float" %}
8300 ins_encode %{
8301 __ movflt($mem$$Address, $src$$XMMRegister);
8302 %}
8303 ins_pipe(pipe_slow); // XXX
8304 %}
8305
8306 // Store immediate Float value (it is faster than store from XMM register)
8307 instruct storeF0(memory mem, immF0 zero)
8308 %{
8309 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8310 match(Set mem (StoreF mem zero));
8311
8312 ins_cost(25); // XXX
8313 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8314 ins_encode %{
8315 __ movl($mem$$Address, r12);
8316 %}
8317 ins_pipe(ialu_mem_reg);
8318 %}
8319
8320 instruct storeF_imm(memory mem, immF src)
8321 %{
8322 match(Set mem (StoreF mem src));
8323
8324 ins_cost(50);
8325 format %{ "movl $mem, $src\t# float" %}
8326 ins_encode %{
8327 __ movl($mem$$Address, jint_cast($src$$constant));
8328 %}
8329 ins_pipe(ialu_mem_imm);
8330 %}
8331
8332 // Store Double
8333 instruct storeD(memory mem, regD src)
8334 %{
8335 match(Set mem (StoreD mem src));
8336
8337 ins_cost(95); // XXX
8338 format %{ "movsd $mem, $src\t# double" %}
8339 ins_encode %{
8340 __ movdbl($mem$$Address, $src$$XMMRegister);
8341 %}
8342 ins_pipe(pipe_slow); // XXX
8343 %}
8344
8345 // Store immediate double 0.0 (it is faster than store from XMM register)
8346 instruct storeD0_imm(memory mem, immD0 src)
8347 %{
8348 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8349 match(Set mem (StoreD mem src));
8350
8351 ins_cost(50);
8352 format %{ "movq $mem, $src\t# double 0." %}
8353 ins_encode %{
8354 __ movq($mem$$Address, $src$$constant);
8355 %}
8356 ins_pipe(ialu_mem_imm);
8357 %}
8358
8359 instruct storeD0(memory mem, immD0 zero)
8360 %{
8361 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8362 match(Set mem (StoreD mem zero));
8363
8364 ins_cost(25); // XXX
8365 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8366 ins_encode %{
8367 __ movq($mem$$Address, r12);
8368 %}
8369 ins_pipe(ialu_mem_reg);
8370 %}
8371
8372 instruct storeSSI(stackSlotI dst, rRegI src)
8373 %{
8374 match(Set dst src);
8375
8376 ins_cost(100);
8377 format %{ "movl $dst, $src\t# int stk" %}
8378 ins_encode %{
8379 __ movl($dst$$Address, $src$$Register);
8380 %}
8381 ins_pipe( ialu_mem_reg );
8382 %}
8383
8384 instruct storeSSL(stackSlotL dst, rRegL src)
8385 %{
8386 match(Set dst src);
8387
8388 ins_cost(100);
8389 format %{ "movq $dst, $src\t# long stk" %}
8390 ins_encode %{
8391 __ movq($dst$$Address, $src$$Register);
8392 %}
8393 ins_pipe(ialu_mem_reg);
8394 %}
8395
8396 instruct storeSSP(stackSlotP dst, rRegP src)
8397 %{
8398 match(Set dst src);
8399
8400 ins_cost(100);
8401 format %{ "movq $dst, $src\t# ptr stk" %}
8402 ins_encode %{
8403 __ movq($dst$$Address, $src$$Register);
8404 %}
8405 ins_pipe(ialu_mem_reg);
8406 %}
8407
8408 instruct storeSSF(stackSlotF dst, regF src)
8409 %{
8410 match(Set dst src);
8411
8412 ins_cost(95); // XXX
8413 format %{ "movss $dst, $src\t# float stk" %}
8414 ins_encode %{
8415 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8416 %}
8417 ins_pipe(pipe_slow); // XXX
8418 %}
8419
8420 instruct storeSSD(stackSlotD dst, regD src)
8421 %{
8422 match(Set dst src);
8423
8424 ins_cost(95); // XXX
8425 format %{ "movsd $dst, $src\t# double stk" %}
8426 ins_encode %{
8427 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8428 %}
8429 ins_pipe(pipe_slow); // XXX
8430 %}
8431
8432 instruct cacheWB(indirect addr)
8433 %{
8434 predicate(VM_Version::supports_data_cache_line_flush());
8435 match(CacheWB addr);
8436
8437 ins_cost(100);
8438 format %{"cache wb $addr" %}
8439 ins_encode %{
8440 assert($addr->index_position() < 0, "should be");
8441 assert($addr$$disp == 0, "should be");
8442 __ cache_wb(Address($addr$$base$$Register, 0));
8443 %}
8444 ins_pipe(pipe_slow); // XXX
8445 %}
8446
8447 instruct cacheWBPreSync()
8448 %{
8449 predicate(VM_Version::supports_data_cache_line_flush());
8450 match(CacheWBPreSync);
8451
8452 ins_cost(100);
8453 format %{"cache wb presync" %}
8454 ins_encode %{
8455 __ cache_wbsync(true);
8456 %}
8457 ins_pipe(pipe_slow); // XXX
8458 %}
8459
8460 instruct cacheWBPostSync()
8461 %{
8462 predicate(VM_Version::supports_data_cache_line_flush());
8463 match(CacheWBPostSync);
8464
8465 ins_cost(100);
8466 format %{"cache wb postsync" %}
8467 ins_encode %{
8468 __ cache_wbsync(false);
8469 %}
8470 ins_pipe(pipe_slow); // XXX
8471 %}
8472
8473 //----------BSWAP Instructions-------------------------------------------------
8474 instruct bytes_reverse_int(rRegI dst) %{
8475 match(Set dst (ReverseBytesI dst));
8476
8477 format %{ "bswapl $dst" %}
8478 ins_encode %{
8479 __ bswapl($dst$$Register);
8480 %}
8481 ins_pipe( ialu_reg );
8482 %}
8483
8484 instruct bytes_reverse_long(rRegL dst) %{
8485 match(Set dst (ReverseBytesL dst));
8486
8487 format %{ "bswapq $dst" %}
8488 ins_encode %{
8489 __ bswapq($dst$$Register);
8490 %}
8491 ins_pipe( ialu_reg);
8492 %}
8493
8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8495 match(Set dst (ReverseBytesUS dst));
8496 effect(KILL cr);
8497
8498 format %{ "bswapl $dst\n\t"
8499 "shrl $dst,16\n\t" %}
8500 ins_encode %{
8501 __ bswapl($dst$$Register);
8502 __ shrl($dst$$Register, 16);
8503 %}
8504 ins_pipe( ialu_reg );
8505 %}
8506
8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8508 match(Set dst (ReverseBytesS dst));
8509 effect(KILL cr);
8510
8511 format %{ "bswapl $dst\n\t"
8512 "sar $dst,16\n\t" %}
8513 ins_encode %{
8514 __ bswapl($dst$$Register);
8515 __ sarl($dst$$Register, 16);
8516 %}
8517 ins_pipe( ialu_reg );
8518 %}
8519
8520 //---------- Zeros Count Instructions ------------------------------------------
8521
8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8523 predicate(UseCountLeadingZerosInstruction);
8524 match(Set dst (CountLeadingZerosI src));
8525 effect(KILL cr);
8526
8527 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8528 ins_encode %{
8529 __ lzcntl($dst$$Register, $src$$Register);
8530 %}
8531 ins_pipe(ialu_reg);
8532 %}
8533
8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8535 predicate(UseCountLeadingZerosInstruction);
8536 match(Set dst (CountLeadingZerosI (LoadI src)));
8537 effect(KILL cr);
8538 ins_cost(175);
8539 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8540 ins_encode %{
8541 __ lzcntl($dst$$Register, $src$$Address);
8542 %}
8543 ins_pipe(ialu_reg_mem);
8544 %}
8545
8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8547 predicate(!UseCountLeadingZerosInstruction);
8548 match(Set dst (CountLeadingZerosI src));
8549 effect(KILL cr);
8550
8551 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8552 "jnz skip\n\t"
8553 "movl $dst, -1\n"
8554 "skip:\n\t"
8555 "negl $dst\n\t"
8556 "addl $dst, 31" %}
8557 ins_encode %{
8558 Register Rdst = $dst$$Register;
8559 Register Rsrc = $src$$Register;
8560 Label skip;
8561 __ bsrl(Rdst, Rsrc);
8562 __ jccb(Assembler::notZero, skip);
8563 __ movl(Rdst, -1);
8564 __ bind(skip);
8565 __ negl(Rdst);
8566 __ addl(Rdst, BitsPerInt - 1);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosL src));
8574 effect(KILL cr);
8575
8576 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8577 ins_encode %{
8578 __ lzcntq($dst$$Register, $src$$Register);
8579 %}
8580 ins_pipe(ialu_reg);
8581 %}
8582
8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8584 predicate(UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosL (LoadL src)));
8586 effect(KILL cr);
8587 ins_cost(175);
8588 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8589 ins_encode %{
8590 __ lzcntq($dst$$Register, $src$$Address);
8591 %}
8592 ins_pipe(ialu_reg_mem);
8593 %}
8594
8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8596 predicate(!UseCountLeadingZerosInstruction);
8597 match(Set dst (CountLeadingZerosL src));
8598 effect(KILL cr);
8599
8600 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8601 "jnz skip\n\t"
8602 "movl $dst, -1\n"
8603 "skip:\n\t"
8604 "negl $dst\n\t"
8605 "addl $dst, 63" %}
8606 ins_encode %{
8607 Register Rdst = $dst$$Register;
8608 Register Rsrc = $src$$Register;
8609 Label skip;
8610 __ bsrq(Rdst, Rsrc);
8611 __ jccb(Assembler::notZero, skip);
8612 __ movl(Rdst, -1);
8613 __ bind(skip);
8614 __ negl(Rdst);
8615 __ addl(Rdst, BitsPerLong - 1);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8621 predicate(UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosI src));
8623 effect(KILL cr);
8624
8625 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8626 ins_encode %{
8627 __ tzcntl($dst$$Register, $src$$Register);
8628 %}
8629 ins_pipe(ialu_reg);
8630 %}
8631
8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8633 predicate(UseCountTrailingZerosInstruction);
8634 match(Set dst (CountTrailingZerosI (LoadI src)));
8635 effect(KILL cr);
8636 ins_cost(175);
8637 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8638 ins_encode %{
8639 __ tzcntl($dst$$Register, $src$$Address);
8640 %}
8641 ins_pipe(ialu_reg_mem);
8642 %}
8643
8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8645 predicate(!UseCountTrailingZerosInstruction);
8646 match(Set dst (CountTrailingZerosI src));
8647 effect(KILL cr);
8648
8649 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8650 "jnz done\n\t"
8651 "movl $dst, 32\n"
8652 "done:" %}
8653 ins_encode %{
8654 Register Rdst = $dst$$Register;
8655 Label done;
8656 __ bsfl(Rdst, $src$$Register);
8657 __ jccb(Assembler::notZero, done);
8658 __ movl(Rdst, BitsPerInt);
8659 __ bind(done);
8660 %}
8661 ins_pipe(ialu_reg);
8662 %}
8663
8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8665 predicate(UseCountTrailingZerosInstruction);
8666 match(Set dst (CountTrailingZerosL src));
8667 effect(KILL cr);
8668
8669 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8670 ins_encode %{
8671 __ tzcntq($dst$$Register, $src$$Register);
8672 %}
8673 ins_pipe(ialu_reg);
8674 %}
8675
8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8677 predicate(UseCountTrailingZerosInstruction);
8678 match(Set dst (CountTrailingZerosL (LoadL src)));
8679 effect(KILL cr);
8680 ins_cost(175);
8681 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8682 ins_encode %{
8683 __ tzcntq($dst$$Register, $src$$Address);
8684 %}
8685 ins_pipe(ialu_reg_mem);
8686 %}
8687
8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8689 predicate(!UseCountTrailingZerosInstruction);
8690 match(Set dst (CountTrailingZerosL src));
8691 effect(KILL cr);
8692
8693 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8694 "jnz done\n\t"
8695 "movl $dst, 64\n"
8696 "done:" %}
8697 ins_encode %{
8698 Register Rdst = $dst$$Register;
8699 Label done;
8700 __ bsfq(Rdst, $src$$Register);
8701 __ jccb(Assembler::notZero, done);
8702 __ movl(Rdst, BitsPerLong);
8703 __ bind(done);
8704 %}
8705 ins_pipe(ialu_reg);
8706 %}
8707
8708 //--------------- Reverse Operation Instructions ----------------
8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8710 predicate(!VM_Version::supports_gfni());
8711 match(Set dst (ReverseI src));
8712 effect(TEMP dst, TEMP rtmp, KILL cr);
8713 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8714 ins_encode %{
8715 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8716 %}
8717 ins_pipe( ialu_reg );
8718 %}
8719
8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8721 predicate(VM_Version::supports_gfni());
8722 match(Set dst (ReverseI src));
8723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8724 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8725 ins_encode %{
8726 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8727 %}
8728 ins_pipe( ialu_reg );
8729 %}
8730
8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8732 predicate(!VM_Version::supports_gfni());
8733 match(Set dst (ReverseL src));
8734 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8735 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8736 ins_encode %{
8737 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8738 %}
8739 ins_pipe( ialu_reg );
8740 %}
8741
8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8743 predicate(VM_Version::supports_gfni());
8744 match(Set dst (ReverseL src));
8745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8746 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8747 ins_encode %{
8748 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8749 %}
8750 ins_pipe( ialu_reg );
8751 %}
8752
8753 //---------- Population Count Instructions -------------------------------------
8754
8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8756 predicate(UsePopCountInstruction);
8757 match(Set dst (PopCountI src));
8758 effect(KILL cr);
8759
8760 format %{ "popcnt $dst, $src" %}
8761 ins_encode %{
8762 __ popcntl($dst$$Register, $src$$Register);
8763 %}
8764 ins_pipe(ialu_reg);
8765 %}
8766
8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8768 predicate(UsePopCountInstruction);
8769 match(Set dst (PopCountI (LoadI mem)));
8770 effect(KILL cr);
8771
8772 format %{ "popcnt $dst, $mem" %}
8773 ins_encode %{
8774 __ popcntl($dst$$Register, $mem$$Address);
8775 %}
8776 ins_pipe(ialu_reg);
8777 %}
8778
8779 // Note: Long.bitCount(long) returns an int.
8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8781 predicate(UsePopCountInstruction);
8782 match(Set dst (PopCountL src));
8783 effect(KILL cr);
8784
8785 format %{ "popcnt $dst, $src" %}
8786 ins_encode %{
8787 __ popcntq($dst$$Register, $src$$Register);
8788 %}
8789 ins_pipe(ialu_reg);
8790 %}
8791
8792 // Note: Long.bitCount(long) returns an int.
8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8794 predicate(UsePopCountInstruction);
8795 match(Set dst (PopCountL (LoadL mem)));
8796 effect(KILL cr);
8797
8798 format %{ "popcnt $dst, $mem" %}
8799 ins_encode %{
8800 __ popcntq($dst$$Register, $mem$$Address);
8801 %}
8802 ins_pipe(ialu_reg);
8803 %}
8804
8805
8806 //----------MemBar Instructions-----------------------------------------------
8807 // Memory barrier flavors
8808
8809 instruct membar_acquire()
8810 %{
8811 match(MemBarAcquire);
8812 match(LoadFence);
8813 ins_cost(0);
8814
8815 size(0);
8816 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8817 ins_encode();
8818 ins_pipe(empty);
8819 %}
8820
8821 instruct membar_acquire_lock()
8822 %{
8823 match(MemBarAcquireLock);
8824 ins_cost(0);
8825
8826 size(0);
8827 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8828 ins_encode();
8829 ins_pipe(empty);
8830 %}
8831
8832 instruct membar_release()
8833 %{
8834 match(MemBarRelease);
8835 match(StoreFence);
8836 ins_cost(0);
8837
8838 size(0);
8839 format %{ "MEMBAR-release ! (empty encoding)" %}
8840 ins_encode();
8841 ins_pipe(empty);
8842 %}
8843
8844 instruct membar_release_lock()
8845 %{
8846 match(MemBarReleaseLock);
8847 ins_cost(0);
8848
8849 size(0);
8850 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8851 ins_encode();
8852 ins_pipe(empty);
8853 %}
8854
8855 instruct membar_volatile(rFlagsReg cr) %{
8856 match(MemBarVolatile);
8857 effect(KILL cr);
8858 ins_cost(400);
8859
8860 format %{
8861 $$template
8862 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8863 %}
8864 ins_encode %{
8865 __ membar(Assembler::StoreLoad);
8866 %}
8867 ins_pipe(pipe_slow);
8868 %}
8869
8870 instruct unnecessary_membar_volatile()
8871 %{
8872 match(MemBarVolatile);
8873 predicate(Matcher::post_store_load_barrier(n));
8874 ins_cost(0);
8875
8876 size(0);
8877 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8878 ins_encode();
8879 ins_pipe(empty);
8880 %}
8881
8882 instruct membar_storestore() %{
8883 match(MemBarStoreStore);
8884 match(StoreStoreFence);
8885 ins_cost(0);
8886
8887 size(0);
8888 format %{ "MEMBAR-storestore (empty encoding)" %}
8889 ins_encode( );
8890 ins_pipe(empty);
8891 %}
8892
8893 //----------Move Instructions--------------------------------------------------
8894
8895 instruct castX2P(rRegP dst, rRegL src)
8896 %{
8897 match(Set dst (CastX2P src));
8898
8899 format %{ "movq $dst, $src\t# long->ptr" %}
8900 ins_encode %{
8901 if ($dst$$reg != $src$$reg) {
8902 __ movptr($dst$$Register, $src$$Register);
8903 }
8904 %}
8905 ins_pipe(ialu_reg_reg); // XXX
8906 %}
8907
8908 instruct castP2X(rRegL dst, rRegP src)
8909 %{
8910 match(Set dst (CastP2X src));
8911
8912 format %{ "movq $dst, $src\t# ptr -> long" %}
8913 ins_encode %{
8914 if ($dst$$reg != $src$$reg) {
8915 __ movptr($dst$$Register, $src$$Register);
8916 }
8917 %}
8918 ins_pipe(ialu_reg_reg); // XXX
8919 %}
8920
8921 // Convert oop into int for vectors alignment masking
8922 instruct convP2I(rRegI dst, rRegP src)
8923 %{
8924 match(Set dst (ConvL2I (CastP2X src)));
8925
8926 format %{ "movl $dst, $src\t# ptr -> int" %}
8927 ins_encode %{
8928 __ movl($dst$$Register, $src$$Register);
8929 %}
8930 ins_pipe(ialu_reg_reg); // XXX
8931 %}
8932
8933 // Convert compressed oop into int for vectors alignment masking
8934 // in case of 32bit oops (heap < 4Gb).
8935 instruct convN2I(rRegI dst, rRegN src)
8936 %{
8937 predicate(CompressedOops::shift() == 0);
8938 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8939
8940 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8941 ins_encode %{
8942 __ movl($dst$$Register, $src$$Register);
8943 %}
8944 ins_pipe(ialu_reg_reg); // XXX
8945 %}
8946
8947 // Convert oop pointer into compressed form
8948 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8949 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8950 match(Set dst (EncodeP src));
8951 effect(KILL cr);
8952 format %{ "encode_heap_oop $dst,$src" %}
8953 ins_encode %{
8954 Register s = $src$$Register;
8955 Register d = $dst$$Register;
8956 if (s != d) {
8957 __ movq(d, s);
8958 }
8959 __ encode_heap_oop(d);
8960 %}
8961 ins_pipe(ialu_reg_long);
8962 %}
8963
8964 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8965 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8966 match(Set dst (EncodeP src));
8967 effect(KILL cr);
8968 format %{ "encode_heap_oop_not_null $dst,$src" %}
8969 ins_encode %{
8970 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8971 %}
8972 ins_pipe(ialu_reg_long);
8973 %}
8974
8975 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8976 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8977 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8978 match(Set dst (DecodeN src));
8979 effect(KILL cr);
8980 format %{ "decode_heap_oop $dst,$src" %}
8981 ins_encode %{
8982 Register s = $src$$Register;
8983 Register d = $dst$$Register;
8984 if (s != d) {
8985 __ movq(d, s);
8986 }
8987 __ decode_heap_oop(d);
8988 %}
8989 ins_pipe(ialu_reg_long);
8990 %}
8991
8992 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
8993 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8994 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8995 match(Set dst (DecodeN src));
8996 effect(KILL cr);
8997 format %{ "decode_heap_oop_not_null $dst,$src" %}
8998 ins_encode %{
8999 Register s = $src$$Register;
9000 Register d = $dst$$Register;
9001 if (s != d) {
9002 __ decode_heap_oop_not_null(d, s);
9003 } else {
9004 __ decode_heap_oop_not_null(d);
9005 }
9006 %}
9007 ins_pipe(ialu_reg_long);
9008 %}
9009
9010 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9011 match(Set dst (EncodePKlass src));
9012 effect(TEMP dst, KILL cr);
9013 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9014 ins_encode %{
9015 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9016 %}
9017 ins_pipe(ialu_reg_long);
9018 %}
9019
9020 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9021 match(Set dst (DecodeNKlass src));
9022 effect(TEMP dst, KILL cr);
9023 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9024 ins_encode %{
9025 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9026 %}
9027 ins_pipe(ialu_reg_long);
9028 %}
9029
9030 //----------Conditional Move---------------------------------------------------
9031 // Jump
9032 // dummy instruction for generating temp registers
9033 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9034 match(Jump (LShiftL switch_val shift));
9035 ins_cost(350);
9036 predicate(false);
9037 effect(TEMP dest);
9038
9039 format %{ "leaq $dest, [$constantaddress]\n\t"
9040 "jmp [$dest + $switch_val << $shift]\n\t" %}
9041 ins_encode %{
9042 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9043 // to do that and the compiler is using that register as one it can allocate.
9044 // So we build it all by hand.
9045 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9046 // ArrayAddress dispatch(table, index);
9047 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9048 __ lea($dest$$Register, $constantaddress);
9049 __ jmp(dispatch);
9050 %}
9051 ins_pipe(pipe_jmp);
9052 %}
9053
9054 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9055 match(Jump (AddL (LShiftL switch_val shift) offset));
9056 ins_cost(350);
9057 effect(TEMP dest);
9058
9059 format %{ "leaq $dest, [$constantaddress]\n\t"
9060 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9061 ins_encode %{
9062 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9063 // to do that and the compiler is using that register as one it can allocate.
9064 // So we build it all by hand.
9065 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9066 // ArrayAddress dispatch(table, index);
9067 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9068 __ lea($dest$$Register, $constantaddress);
9069 __ jmp(dispatch);
9070 %}
9071 ins_pipe(pipe_jmp);
9072 %}
9073
9074 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9075 match(Jump switch_val);
9076 ins_cost(350);
9077 effect(TEMP dest);
9078
9079 format %{ "leaq $dest, [$constantaddress]\n\t"
9080 "jmp [$dest + $switch_val]\n\t" %}
9081 ins_encode %{
9082 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9083 // to do that and the compiler is using that register as one it can allocate.
9084 // So we build it all by hand.
9085 // Address index(noreg, switch_reg, Address::times_1);
9086 // ArrayAddress dispatch(table, index);
9087 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9088 __ lea($dest$$Register, $constantaddress);
9089 __ jmp(dispatch);
9090 %}
9091 ins_pipe(pipe_jmp);
9092 %}
9093
9094 // Conditional move
9095 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9096 %{
9097 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9098 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9099
9100 ins_cost(100); // XXX
9101 format %{ "setbn$cop $dst\t# signed, int" %}
9102 ins_encode %{
9103 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9104 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9105 %}
9106 ins_pipe(ialu_reg);
9107 %}
9108
9109 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9110 %{
9111 predicate(!UseAPX);
9112 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9113
9114 ins_cost(200); // XXX
9115 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9116 ins_encode %{
9117 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9118 %}
9119 ins_pipe(pipe_cmov_reg);
9120 %}
9121
9122 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9123 %{
9124 predicate(UseAPX);
9125 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9126
9127 ins_cost(200);
9128 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9129 ins_encode %{
9130 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9131 %}
9132 ins_pipe(pipe_cmov_reg);
9133 %}
9134
9135 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9136 %{
9137 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9138 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9139
9140 ins_cost(100); // XXX
9141 format %{ "setbn$cop $dst\t# unsigned, int" %}
9142 ins_encode %{
9143 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9144 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9145 %}
9146 ins_pipe(ialu_reg);
9147 %}
9148
9149 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9150 predicate(!UseAPX);
9151 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9152
9153 ins_cost(200); // XXX
9154 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9155 ins_encode %{
9156 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9157 %}
9158 ins_pipe(pipe_cmov_reg);
9159 %}
9160
9161 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9162 predicate(UseAPX);
9163 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9164
9165 ins_cost(200);
9166 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9167 ins_encode %{
9168 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9169 %}
9170 ins_pipe(pipe_cmov_reg);
9171 %}
9172
9173 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9174 %{
9175 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9176 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9177
9178 ins_cost(100); // XXX
9179 format %{ "setbn$cop $dst\t# unsigned, int" %}
9180 ins_encode %{
9181 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9182 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9183 %}
9184 ins_pipe(ialu_reg);
9185 %}
9186
9187 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9188 %{
9189 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9190 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9191
9192 ins_cost(100); // XXX
9193 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9194 ins_encode %{
9195 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9196 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9197 %}
9198 ins_pipe(ialu_reg);
9199 %}
9200
9201 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9202 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9203
9204 ins_cost(200);
9205 expand %{
9206 cmovI_regU(cop, cr, dst, src);
9207 %}
9208 %}
9209
9210 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9211 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9212
9213 ins_cost(200);
9214 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9215 ins_encode %{
9216 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9217 %}
9218 ins_pipe(pipe_cmov_reg);
9219 %}
9220
9221 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9222 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9223 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9224
9225 ins_cost(200); // XXX
9226 format %{ "cmovpl $dst, $src\n\t"
9227 "cmovnel $dst, $src" %}
9228 ins_encode %{
9229 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9230 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9231 %}
9232 ins_pipe(pipe_cmov_reg);
9233 %}
9234
9235 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9236 // inputs of the CMove
9237 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9238 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9239 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9240 effect(TEMP dst);
9241
9242 ins_cost(200); // XXX
9243 format %{ "cmovpl $dst, $src\n\t"
9244 "cmovnel $dst, $src" %}
9245 ins_encode %{
9246 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9247 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9248 %}
9249 ins_pipe(pipe_cmov_reg);
9250 %}
9251
9252 // Conditional move
9253 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9254 predicate(!UseAPX);
9255 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9256
9257 ins_cost(250); // XXX
9258 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9259 ins_encode %{
9260 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9261 %}
9262 ins_pipe(pipe_cmov_mem);
9263 %}
9264
9265 // Conditional move
9266 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9267 %{
9268 predicate(UseAPX);
9269 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9270
9271 ins_cost(250);
9272 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9273 ins_encode %{
9274 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9275 %}
9276 ins_pipe(pipe_cmov_mem);
9277 %}
9278
9279 // Conditional move
9280 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9281 %{
9282 predicate(!UseAPX);
9283 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9284
9285 ins_cost(250); // XXX
9286 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9287 ins_encode %{
9288 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9289 %}
9290 ins_pipe(pipe_cmov_mem);
9291 %}
9292
9293 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9294 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9295
9296 ins_cost(250);
9297 expand %{
9298 cmovI_memU(cop, cr, dst, src);
9299 %}
9300 %}
9301
9302 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9303 %{
9304 predicate(UseAPX);
9305 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9306
9307 ins_cost(250);
9308 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9309 ins_encode %{
9310 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9311 %}
9312 ins_pipe(pipe_cmov_mem);
9313 %}
9314
9315 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9316 %{
9317 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9318
9319 ins_cost(250);
9320 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9321 ins_encode %{
9322 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9323 %}
9324 ins_pipe(pipe_cmov_mem);
9325 %}
9326
9327 // Conditional move
9328 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9329 %{
9330 predicate(!UseAPX);
9331 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9332
9333 ins_cost(200); // XXX
9334 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9335 ins_encode %{
9336 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9337 %}
9338 ins_pipe(pipe_cmov_reg);
9339 %}
9340
9341 // Conditional move ndd
9342 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9343 %{
9344 predicate(UseAPX);
9345 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9346
9347 ins_cost(200);
9348 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9349 ins_encode %{
9350 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9351 %}
9352 ins_pipe(pipe_cmov_reg);
9353 %}
9354
9355 // Conditional move
9356 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9357 %{
9358 predicate(!UseAPX);
9359 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9360
9361 ins_cost(200); // XXX
9362 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9363 ins_encode %{
9364 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9365 %}
9366 ins_pipe(pipe_cmov_reg);
9367 %}
9368
9369 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9370 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9371
9372 ins_cost(200);
9373 expand %{
9374 cmovN_regU(cop, cr, dst, src);
9375 %}
9376 %}
9377
9378 // Conditional move ndd
9379 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9380 %{
9381 predicate(UseAPX);
9382 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9383
9384 ins_cost(200);
9385 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9386 ins_encode %{
9387 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9388 %}
9389 ins_pipe(pipe_cmov_reg);
9390 %}
9391
9392 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9393 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9394
9395 ins_cost(200);
9396 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9397 ins_encode %{
9398 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9399 %}
9400 ins_pipe(pipe_cmov_reg);
9401 %}
9402
9403 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9404 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9405 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9406
9407 ins_cost(200); // XXX
9408 format %{ "cmovpl $dst, $src\n\t"
9409 "cmovnel $dst, $src" %}
9410 ins_encode %{
9411 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9412 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9413 %}
9414 ins_pipe(pipe_cmov_reg);
9415 %}
9416
9417 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9418 // inputs of the CMove
9419 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9420 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9421 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9422
9423 ins_cost(200); // XXX
9424 format %{ "cmovpl $dst, $src\n\t"
9425 "cmovnel $dst, $src" %}
9426 ins_encode %{
9427 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9428 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9429 %}
9430 ins_pipe(pipe_cmov_reg);
9431 %}
9432
9433 // Conditional move
9434 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9435 %{
9436 predicate(!UseAPX);
9437 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9438
9439 ins_cost(200); // XXX
9440 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9441 ins_encode %{
9442 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9443 %}
9444 ins_pipe(pipe_cmov_reg); // XXX
9445 %}
9446
9447 // Conditional move ndd
9448 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9449 %{
9450 predicate(UseAPX);
9451 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9452
9453 ins_cost(200);
9454 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9455 ins_encode %{
9456 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9457 %}
9458 ins_pipe(pipe_cmov_reg);
9459 %}
9460
9461 // Conditional move
9462 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9463 %{
9464 predicate(!UseAPX);
9465 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9466
9467 ins_cost(200); // XXX
9468 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9469 ins_encode %{
9470 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9471 %}
9472 ins_pipe(pipe_cmov_reg); // XXX
9473 %}
9474
9475 // Conditional move ndd
9476 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9477 %{
9478 predicate(UseAPX);
9479 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9480
9481 ins_cost(200);
9482 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9483 ins_encode %{
9484 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9485 %}
9486 ins_pipe(pipe_cmov_reg);
9487 %}
9488
9489 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9490 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9491
9492 ins_cost(200);
9493 expand %{
9494 cmovP_regU(cop, cr, dst, src);
9495 %}
9496 %}
9497
9498 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9499 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9500
9501 ins_cost(200);
9502 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9503 ins_encode %{
9504 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9505 %}
9506 ins_pipe(pipe_cmov_reg);
9507 %}
9508
9509 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9510 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9511 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9512
9513 ins_cost(200); // XXX
9514 format %{ "cmovpq $dst, $src\n\t"
9515 "cmovneq $dst, $src" %}
9516 ins_encode %{
9517 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9518 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9519 %}
9520 ins_pipe(pipe_cmov_reg);
9521 %}
9522
9523 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9524 // inputs of the CMove
9525 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9526 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9527 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9528
9529 ins_cost(200); // XXX
9530 format %{ "cmovpq $dst, $src\n\t"
9531 "cmovneq $dst, $src" %}
9532 ins_encode %{
9533 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9534 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9535 %}
9536 ins_pipe(pipe_cmov_reg);
9537 %}
9538
9539 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9540 %{
9541 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9542 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9543
9544 ins_cost(100); // XXX
9545 format %{ "setbn$cop $dst\t# signed, long" %}
9546 ins_encode %{
9547 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9548 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9549 %}
9550 ins_pipe(ialu_reg);
9551 %}
9552
9553 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9554 %{
9555 predicate(!UseAPX);
9556 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9557
9558 ins_cost(200); // XXX
9559 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9560 ins_encode %{
9561 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9562 %}
9563 ins_pipe(pipe_cmov_reg); // XXX
9564 %}
9565
9566 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9567 %{
9568 predicate(UseAPX);
9569 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9570
9571 ins_cost(200);
9572 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9573 ins_encode %{
9574 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9575 %}
9576 ins_pipe(pipe_cmov_reg);
9577 %}
9578
9579 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9580 %{
9581 predicate(!UseAPX);
9582 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9583
9584 ins_cost(200); // XXX
9585 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9586 ins_encode %{
9587 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9588 %}
9589 ins_pipe(pipe_cmov_mem); // XXX
9590 %}
9591
9592 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9593 %{
9594 predicate(UseAPX);
9595 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9596
9597 ins_cost(200);
9598 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9599 ins_encode %{
9600 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9601 %}
9602 ins_pipe(pipe_cmov_mem);
9603 %}
9604
9605 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9606 %{
9607 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9608 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9609
9610 ins_cost(100); // XXX
9611 format %{ "setbn$cop $dst\t# unsigned, long" %}
9612 ins_encode %{
9613 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9614 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9615 %}
9616 ins_pipe(ialu_reg);
9617 %}
9618
9619 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9620 %{
9621 predicate(!UseAPX);
9622 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9623
9624 ins_cost(200); // XXX
9625 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9626 ins_encode %{
9627 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9628 %}
9629 ins_pipe(pipe_cmov_reg); // XXX
9630 %}
9631
9632 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9633 %{
9634 predicate(UseAPX);
9635 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9636
9637 ins_cost(200);
9638 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9639 ins_encode %{
9640 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9641 %}
9642 ins_pipe(pipe_cmov_reg);
9643 %}
9644
9645 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9646 %{
9647 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9648 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9649
9650 ins_cost(100); // XXX
9651 format %{ "setbn$cop $dst\t# unsigned, long" %}
9652 ins_encode %{
9653 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9654 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9655 %}
9656 ins_pipe(ialu_reg);
9657 %}
9658
9659 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9660 %{
9661 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9662 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9663
9664 ins_cost(100); // XXX
9665 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9666 ins_encode %{
9667 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9668 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9669 %}
9670 ins_pipe(ialu_reg);
9671 %}
9672
9673 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9674 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9675
9676 ins_cost(200);
9677 expand %{
9678 cmovL_regU(cop, cr, dst, src);
9679 %}
9680 %}
9681
9682 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9683 %{
9684 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9685
9686 ins_cost(200);
9687 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9688 ins_encode %{
9689 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9690 %}
9691 ins_pipe(pipe_cmov_reg);
9692 %}
9693
9694 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9695 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9696 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9697
9698 ins_cost(200); // XXX
9699 format %{ "cmovpq $dst, $src\n\t"
9700 "cmovneq $dst, $src" %}
9701 ins_encode %{
9702 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9703 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9704 %}
9705 ins_pipe(pipe_cmov_reg);
9706 %}
9707
9708 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9709 // inputs of the CMove
9710 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9711 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9712 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9713
9714 ins_cost(200); // XXX
9715 format %{ "cmovpq $dst, $src\n\t"
9716 "cmovneq $dst, $src" %}
9717 ins_encode %{
9718 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9719 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9720 %}
9721 ins_pipe(pipe_cmov_reg);
9722 %}
9723
9724 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9725 %{
9726 predicate(!UseAPX);
9727 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9728
9729 ins_cost(200); // XXX
9730 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9731 ins_encode %{
9732 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9733 %}
9734 ins_pipe(pipe_cmov_mem); // XXX
9735 %}
9736
9737 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9738 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9739
9740 ins_cost(200);
9741 expand %{
9742 cmovL_memU(cop, cr, dst, src);
9743 %}
9744 %}
9745
9746 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9747 %{
9748 predicate(UseAPX);
9749 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9750
9751 ins_cost(200);
9752 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9753 ins_encode %{
9754 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9755 %}
9756 ins_pipe(pipe_cmov_mem);
9757 %}
9758
9759 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9760 %{
9761 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9762
9763 ins_cost(200);
9764 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9765 ins_encode %{
9766 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9767 %}
9768 ins_pipe(pipe_cmov_mem);
9769 %}
9770
9771 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9772 %{
9773 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9774
9775 ins_cost(200); // XXX
9776 format %{ "jn$cop skip\t# signed cmove float\n\t"
9777 "movss $dst, $src\n"
9778 "skip:" %}
9779 ins_encode %{
9780 Label Lskip;
9781 // Invert sense of branch from sense of CMOV
9782 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9783 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9784 __ bind(Lskip);
9785 %}
9786 ins_pipe(pipe_slow);
9787 %}
9788
9789 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9790 %{
9791 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9792
9793 ins_cost(200); // XXX
9794 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9795 "movss $dst, $src\n"
9796 "skip:" %}
9797 ins_encode %{
9798 Label Lskip;
9799 // Invert sense of branch from sense of CMOV
9800 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9801 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9802 __ bind(Lskip);
9803 %}
9804 ins_pipe(pipe_slow);
9805 %}
9806
9807 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9808 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9809
9810 ins_cost(200);
9811 expand %{
9812 cmovF_regU(cop, cr, dst, src);
9813 %}
9814 %}
9815
9816 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9817 %{
9818 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9819
9820 ins_cost(200); // XXX
9821 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9822 "movss $dst, $src\n"
9823 "skip:" %}
9824 ins_encode %{
9825 Label Lskip;
9826 // Invert sense of branch from sense of CMOV
9827 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9828 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9829 __ bind(Lskip);
9830 %}
9831 ins_pipe(pipe_slow);
9832 %}
9833
9834 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9835 %{
9836 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9837
9838 ins_cost(200); // XXX
9839 format %{ "jn$cop skip\t# signed cmove double\n\t"
9840 "movsd $dst, $src\n"
9841 "skip:" %}
9842 ins_encode %{
9843 Label Lskip;
9844 // Invert sense of branch from sense of CMOV
9845 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9846 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9847 __ bind(Lskip);
9848 %}
9849 ins_pipe(pipe_slow);
9850 %}
9851
9852 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9853 %{
9854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9855
9856 ins_cost(200); // XXX
9857 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9858 "movsd $dst, $src\n"
9859 "skip:" %}
9860 ins_encode %{
9861 Label Lskip;
9862 // Invert sense of branch from sense of CMOV
9863 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9864 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9865 __ bind(Lskip);
9866 %}
9867 ins_pipe(pipe_slow);
9868 %}
9869
9870 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9871 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9872
9873 ins_cost(200);
9874 expand %{
9875 cmovD_regU(cop, cr, dst, src);
9876 %}
9877 %}
9878
9879 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9880 %{
9881 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9882
9883 ins_cost(200); // XXX
9884 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9885 "movsd $dst, $src\n"
9886 "skip:" %}
9887 ins_encode %{
9888 Label Lskip;
9889 // Invert sense of branch from sense of CMOV
9890 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9892 __ bind(Lskip);
9893 %}
9894 ins_pipe(pipe_slow);
9895 %}
9896
9897 //----------Arithmetic Instructions--------------------------------------------
9898 //----------Addition Instructions----------------------------------------------
9899
9900 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9901 %{
9902 predicate(!UseAPX);
9903 match(Set dst (AddI dst src));
9904 effect(KILL cr);
9905 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9906 format %{ "addl $dst, $src\t# int" %}
9907 ins_encode %{
9908 __ addl($dst$$Register, $src$$Register);
9909 %}
9910 ins_pipe(ialu_reg_reg);
9911 %}
9912
9913 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9914 %{
9915 predicate(UseAPX);
9916 match(Set dst (AddI src1 src2));
9917 effect(KILL cr);
9918 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9919
9920 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9921 ins_encode %{
9922 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9923 %}
9924 ins_pipe(ialu_reg_reg);
9925 %}
9926
9927 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9928 %{
9929 predicate(!UseAPX);
9930 match(Set dst (AddI dst src));
9931 effect(KILL cr);
9932 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9933
9934 format %{ "addl $dst, $src\t# int" %}
9935 ins_encode %{
9936 __ addl($dst$$Register, $src$$constant);
9937 %}
9938 ins_pipe( ialu_reg );
9939 %}
9940
9941 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9942 %{
9943 predicate(UseAPX);
9944 match(Set dst (AddI src1 src2));
9945 effect(KILL cr);
9946 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9947
9948 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9949 ins_encode %{
9950 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9951 %}
9952 ins_pipe( ialu_reg );
9953 %}
9954
9955 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9956 %{
9957 predicate(UseAPX);
9958 match(Set dst (AddI (LoadI src1) src2));
9959 effect(KILL cr);
9960 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9961
9962 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9963 ins_encode %{
9964 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9965 %}
9966 ins_pipe( ialu_reg );
9967 %}
9968
9969 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9970 %{
9971 predicate(!UseAPX);
9972 match(Set dst (AddI dst (LoadI src)));
9973 effect(KILL cr);
9974 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9975
9976 ins_cost(150); // XXX
9977 format %{ "addl $dst, $src\t# int" %}
9978 ins_encode %{
9979 __ addl($dst$$Register, $src$$Address);
9980 %}
9981 ins_pipe(ialu_reg_mem);
9982 %}
9983
9984 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
9985 %{
9986 predicate(UseAPX);
9987 match(Set dst (AddI src1 (LoadI src2)));
9988 effect(KILL cr);
9989 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9990
9991 ins_cost(150);
9992 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9993 ins_encode %{
9994 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
9995 %}
9996 ins_pipe(ialu_reg_mem);
9997 %}
9998
9999 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10000 %{
10001 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10002 effect(KILL cr);
10003 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10004
10005 ins_cost(150); // XXX
10006 format %{ "addl $dst, $src\t# int" %}
10007 ins_encode %{
10008 __ addl($dst$$Address, $src$$Register);
10009 %}
10010 ins_pipe(ialu_mem_reg);
10011 %}
10012
10013 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10014 %{
10015 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10016 effect(KILL cr);
10017 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10018
10019
10020 ins_cost(125); // XXX
10021 format %{ "addl $dst, $src\t# int" %}
10022 ins_encode %{
10023 __ addl($dst$$Address, $src$$constant);
10024 %}
10025 ins_pipe(ialu_mem_imm);
10026 %}
10027
10028 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10029 %{
10030 predicate(!UseAPX && UseIncDec);
10031 match(Set dst (AddI dst src));
10032 effect(KILL cr);
10033
10034 format %{ "incl $dst\t# int" %}
10035 ins_encode %{
10036 __ incrementl($dst$$Register);
10037 %}
10038 ins_pipe(ialu_reg);
10039 %}
10040
10041 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10042 %{
10043 predicate(UseAPX && UseIncDec);
10044 match(Set dst (AddI src val));
10045 effect(KILL cr);
10046 flag(PD::Flag_ndd_demotable_opr1);
10047
10048 format %{ "eincl $dst, $src\t# int ndd" %}
10049 ins_encode %{
10050 __ eincl($dst$$Register, $src$$Register, false);
10051 %}
10052 ins_pipe(ialu_reg);
10053 %}
10054
10055 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10056 %{
10057 predicate(UseAPX && UseIncDec);
10058 match(Set dst (AddI (LoadI src) val));
10059 effect(KILL cr);
10060
10061 format %{ "eincl $dst, $src\t# int ndd" %}
10062 ins_encode %{
10063 __ eincl($dst$$Register, $src$$Address, false);
10064 %}
10065 ins_pipe(ialu_reg);
10066 %}
10067
10068 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10069 %{
10070 predicate(UseIncDec);
10071 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10072 effect(KILL cr);
10073
10074 ins_cost(125); // XXX
10075 format %{ "incl $dst\t# int" %}
10076 ins_encode %{
10077 __ incrementl($dst$$Address);
10078 %}
10079 ins_pipe(ialu_mem_imm);
10080 %}
10081
10082 // XXX why does that use AddI
10083 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10084 %{
10085 predicate(!UseAPX && UseIncDec);
10086 match(Set dst (AddI dst src));
10087 effect(KILL cr);
10088
10089 format %{ "decl $dst\t# int" %}
10090 ins_encode %{
10091 __ decrementl($dst$$Register);
10092 %}
10093 ins_pipe(ialu_reg);
10094 %}
10095
10096 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10097 %{
10098 predicate(UseAPX && UseIncDec);
10099 match(Set dst (AddI src val));
10100 effect(KILL cr);
10101 flag(PD::Flag_ndd_demotable_opr1);
10102
10103 format %{ "edecl $dst, $src\t# int ndd" %}
10104 ins_encode %{
10105 __ edecl($dst$$Register, $src$$Register, false);
10106 %}
10107 ins_pipe(ialu_reg);
10108 %}
10109
10110 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10111 %{
10112 predicate(UseAPX && UseIncDec);
10113 match(Set dst (AddI (LoadI src) val));
10114 effect(KILL cr);
10115
10116 format %{ "edecl $dst, $src\t# int ndd" %}
10117 ins_encode %{
10118 __ edecl($dst$$Register, $src$$Address, false);
10119 %}
10120 ins_pipe(ialu_reg);
10121 %}
10122
10123 // XXX why does that use AddI
10124 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10125 %{
10126 predicate(UseIncDec);
10127 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10128 effect(KILL cr);
10129
10130 ins_cost(125); // XXX
10131 format %{ "decl $dst\t# int" %}
10132 ins_encode %{
10133 __ decrementl($dst$$Address);
10134 %}
10135 ins_pipe(ialu_mem_imm);
10136 %}
10137
10138 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10139 %{
10140 predicate(VM_Version::supports_fast_2op_lea());
10141 match(Set dst (AddI (LShiftI index scale) disp));
10142
10143 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10144 ins_encode %{
10145 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10146 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10147 %}
10148 ins_pipe(ialu_reg_reg);
10149 %}
10150
10151 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10152 %{
10153 predicate(VM_Version::supports_fast_3op_lea());
10154 match(Set dst (AddI (AddI base index) disp));
10155
10156 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10157 ins_encode %{
10158 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10159 %}
10160 ins_pipe(ialu_reg_reg);
10161 %}
10162
10163 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10164 %{
10165 predicate(VM_Version::supports_fast_2op_lea());
10166 match(Set dst (AddI base (LShiftI index scale)));
10167
10168 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10169 ins_encode %{
10170 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10171 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10172 %}
10173 ins_pipe(ialu_reg_reg);
10174 %}
10175
10176 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10177 %{
10178 predicate(VM_Version::supports_fast_3op_lea());
10179 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10180
10181 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10182 ins_encode %{
10183 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10184 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10185 %}
10186 ins_pipe(ialu_reg_reg);
10187 %}
10188
10189 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10190 %{
10191 predicate(!UseAPX);
10192 match(Set dst (AddL dst src));
10193 effect(KILL cr);
10194 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10195
10196 format %{ "addq $dst, $src\t# long" %}
10197 ins_encode %{
10198 __ addq($dst$$Register, $src$$Register);
10199 %}
10200 ins_pipe(ialu_reg_reg);
10201 %}
10202
10203 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10204 %{
10205 predicate(UseAPX);
10206 match(Set dst (AddL src1 src2));
10207 effect(KILL cr);
10208 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10209
10210 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10211 ins_encode %{
10212 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10213 %}
10214 ins_pipe(ialu_reg_reg);
10215 %}
10216
10217 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10218 %{
10219 predicate(!UseAPX);
10220 match(Set dst (AddL dst src));
10221 effect(KILL cr);
10222 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10223
10224 format %{ "addq $dst, $src\t# long" %}
10225 ins_encode %{
10226 __ addq($dst$$Register, $src$$constant);
10227 %}
10228 ins_pipe( ialu_reg );
10229 %}
10230
10231 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10232 %{
10233 predicate(UseAPX);
10234 match(Set dst (AddL src1 src2));
10235 effect(KILL cr);
10236 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10237
10238 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10239 ins_encode %{
10240 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10241 %}
10242 ins_pipe( ialu_reg );
10243 %}
10244
10245 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10246 %{
10247 predicate(UseAPX);
10248 match(Set dst (AddL (LoadL src1) src2));
10249 effect(KILL cr);
10250 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10251
10252 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10253 ins_encode %{
10254 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10255 %}
10256 ins_pipe( ialu_reg );
10257 %}
10258
10259 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10260 %{
10261 predicate(!UseAPX);
10262 match(Set dst (AddL dst (LoadL src)));
10263 effect(KILL cr);
10264 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10265
10266 ins_cost(150); // XXX
10267 format %{ "addq $dst, $src\t# long" %}
10268 ins_encode %{
10269 __ addq($dst$$Register, $src$$Address);
10270 %}
10271 ins_pipe(ialu_reg_mem);
10272 %}
10273
10274 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10275 %{
10276 predicate(UseAPX);
10277 match(Set dst (AddL src1 (LoadL src2)));
10278 effect(KILL cr);
10279 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10280
10281 ins_cost(150);
10282 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10283 ins_encode %{
10284 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10285 %}
10286 ins_pipe(ialu_reg_mem);
10287 %}
10288
10289 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10290 %{
10291 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10292 effect(KILL cr);
10293 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10294
10295 ins_cost(150); // XXX
10296 format %{ "addq $dst, $src\t# long" %}
10297 ins_encode %{
10298 __ addq($dst$$Address, $src$$Register);
10299 %}
10300 ins_pipe(ialu_mem_reg);
10301 %}
10302
10303 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10304 %{
10305 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10306 effect(KILL cr);
10307 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10308
10309 ins_cost(125); // XXX
10310 format %{ "addq $dst, $src\t# long" %}
10311 ins_encode %{
10312 __ addq($dst$$Address, $src$$constant);
10313 %}
10314 ins_pipe(ialu_mem_imm);
10315 %}
10316
10317 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10318 %{
10319 predicate(!UseAPX && UseIncDec);
10320 match(Set dst (AddL dst src));
10321 effect(KILL cr);
10322
10323 format %{ "incq $dst\t# long" %}
10324 ins_encode %{
10325 __ incrementq($dst$$Register);
10326 %}
10327 ins_pipe(ialu_reg);
10328 %}
10329
10330 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10331 %{
10332 predicate(UseAPX && UseIncDec);
10333 match(Set dst (AddL src val));
10334 effect(KILL cr);
10335 flag(PD::Flag_ndd_demotable_opr1);
10336
10337 format %{ "eincq $dst, $src\t# long ndd" %}
10338 ins_encode %{
10339 __ eincq($dst$$Register, $src$$Register, false);
10340 %}
10341 ins_pipe(ialu_reg);
10342 %}
10343
10344 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10345 %{
10346 predicate(UseAPX && UseIncDec);
10347 match(Set dst (AddL (LoadL src) val));
10348 effect(KILL cr);
10349
10350 format %{ "eincq $dst, $src\t# long ndd" %}
10351 ins_encode %{
10352 __ eincq($dst$$Register, $src$$Address, false);
10353 %}
10354 ins_pipe(ialu_reg);
10355 %}
10356
10357 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10358 %{
10359 predicate(UseIncDec);
10360 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10361 effect(KILL cr);
10362
10363 ins_cost(125); // XXX
10364 format %{ "incq $dst\t# long" %}
10365 ins_encode %{
10366 __ incrementq($dst$$Address);
10367 %}
10368 ins_pipe(ialu_mem_imm);
10369 %}
10370
10371 // XXX why does that use AddL
10372 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10373 %{
10374 predicate(!UseAPX && UseIncDec);
10375 match(Set dst (AddL dst src));
10376 effect(KILL cr);
10377
10378 format %{ "decq $dst\t# long" %}
10379 ins_encode %{
10380 __ decrementq($dst$$Register);
10381 %}
10382 ins_pipe(ialu_reg);
10383 %}
10384
10385 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10386 %{
10387 predicate(UseAPX && UseIncDec);
10388 match(Set dst (AddL src val));
10389 effect(KILL cr);
10390 flag(PD::Flag_ndd_demotable_opr1);
10391
10392 format %{ "edecq $dst, $src\t# long ndd" %}
10393 ins_encode %{
10394 __ edecq($dst$$Register, $src$$Register, false);
10395 %}
10396 ins_pipe(ialu_reg);
10397 %}
10398
10399 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10400 %{
10401 predicate(UseAPX && UseIncDec);
10402 match(Set dst (AddL (LoadL src) val));
10403 effect(KILL cr);
10404
10405 format %{ "edecq $dst, $src\t# long ndd" %}
10406 ins_encode %{
10407 __ edecq($dst$$Register, $src$$Address, false);
10408 %}
10409 ins_pipe(ialu_reg);
10410 %}
10411
10412 // XXX why does that use AddL
10413 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10414 %{
10415 predicate(UseIncDec);
10416 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10417 effect(KILL cr);
10418
10419 ins_cost(125); // XXX
10420 format %{ "decq $dst\t# long" %}
10421 ins_encode %{
10422 __ decrementq($dst$$Address);
10423 %}
10424 ins_pipe(ialu_mem_imm);
10425 %}
10426
10427 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10428 %{
10429 predicate(VM_Version::supports_fast_2op_lea());
10430 match(Set dst (AddL (LShiftL index scale) disp));
10431
10432 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10433 ins_encode %{
10434 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10435 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10436 %}
10437 ins_pipe(ialu_reg_reg);
10438 %}
10439
10440 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10441 %{
10442 predicate(VM_Version::supports_fast_3op_lea());
10443 match(Set dst (AddL (AddL base index) disp));
10444
10445 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10446 ins_encode %{
10447 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10448 %}
10449 ins_pipe(ialu_reg_reg);
10450 %}
10451
10452 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10453 %{
10454 predicate(VM_Version::supports_fast_2op_lea());
10455 match(Set dst (AddL base (LShiftL index scale)));
10456
10457 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10458 ins_encode %{
10459 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10460 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10461 %}
10462 ins_pipe(ialu_reg_reg);
10463 %}
10464
10465 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10466 %{
10467 predicate(VM_Version::supports_fast_3op_lea());
10468 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10469
10470 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10471 ins_encode %{
10472 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10473 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10474 %}
10475 ins_pipe(ialu_reg_reg);
10476 %}
10477
10478 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10479 %{
10480 match(Set dst (AddP dst src));
10481 effect(KILL cr);
10482 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10483
10484 format %{ "addq $dst, $src\t# ptr" %}
10485 ins_encode %{
10486 __ addq($dst$$Register, $src$$Register);
10487 %}
10488 ins_pipe(ialu_reg_reg);
10489 %}
10490
10491 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10492 %{
10493 match(Set dst (AddP dst src));
10494 effect(KILL cr);
10495 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10496
10497 format %{ "addq $dst, $src\t# ptr" %}
10498 ins_encode %{
10499 __ addq($dst$$Register, $src$$constant);
10500 %}
10501 ins_pipe( ialu_reg );
10502 %}
10503
10504 // XXX addP mem ops ????
10505
10506 instruct checkCastPP(rRegP dst)
10507 %{
10508 match(Set dst (CheckCastPP dst));
10509
10510 size(0);
10511 format %{ "# checkcastPP of $dst" %}
10512 ins_encode(/* empty encoding */);
10513 ins_pipe(empty);
10514 %}
10515
10516 instruct castPP(rRegP dst)
10517 %{
10518 match(Set dst (CastPP dst));
10519
10520 size(0);
10521 format %{ "# castPP of $dst" %}
10522 ins_encode(/* empty encoding */);
10523 ins_pipe(empty);
10524 %}
10525
10526 instruct castII(rRegI dst)
10527 %{
10528 predicate(VerifyConstraintCasts == 0);
10529 match(Set dst (CastII dst));
10530
10531 size(0);
10532 format %{ "# castII of $dst" %}
10533 ins_encode(/* empty encoding */);
10534 ins_cost(0);
10535 ins_pipe(empty);
10536 %}
10537
10538 instruct castII_checked(rRegI dst, rFlagsReg cr)
10539 %{
10540 predicate(VerifyConstraintCasts > 0);
10541 match(Set dst (CastII dst));
10542
10543 effect(KILL cr);
10544 format %{ "# cast_checked_II $dst" %}
10545 ins_encode %{
10546 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10547 %}
10548 ins_pipe(pipe_slow);
10549 %}
10550
10551 instruct castLL(rRegL dst)
10552 %{
10553 predicate(VerifyConstraintCasts == 0);
10554 match(Set dst (CastLL dst));
10555
10556 size(0);
10557 format %{ "# castLL of $dst" %}
10558 ins_encode(/* empty encoding */);
10559 ins_cost(0);
10560 ins_pipe(empty);
10561 %}
10562
10563 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10564 %{
10565 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10566 match(Set dst (CastLL dst));
10567
10568 effect(KILL cr);
10569 format %{ "# cast_checked_LL $dst" %}
10570 ins_encode %{
10571 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10572 %}
10573 ins_pipe(pipe_slow);
10574 %}
10575
10576 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10577 %{
10578 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10579 match(Set dst (CastLL dst));
10580
10581 effect(KILL cr, TEMP tmp);
10582 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10583 ins_encode %{
10584 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10585 %}
10586 ins_pipe(pipe_slow);
10587 %}
10588
10589 instruct castFF(regF dst)
10590 %{
10591 match(Set dst (CastFF dst));
10592
10593 size(0);
10594 format %{ "# castFF of $dst" %}
10595 ins_encode(/* empty encoding */);
10596 ins_cost(0);
10597 ins_pipe(empty);
10598 %}
10599
10600 instruct castHH(regF dst)
10601 %{
10602 match(Set dst (CastHH dst));
10603
10604 size(0);
10605 format %{ "# castHH of $dst" %}
10606 ins_encode(/* empty encoding */);
10607 ins_cost(0);
10608 ins_pipe(empty);
10609 %}
10610
10611 instruct castDD(regD dst)
10612 %{
10613 match(Set dst (CastDD dst));
10614
10615 size(0);
10616 format %{ "# castDD of $dst" %}
10617 ins_encode(/* empty encoding */);
10618 ins_cost(0);
10619 ins_pipe(empty);
10620 %}
10621
10622 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10623 instruct compareAndSwapP(rRegI res,
10624 memory mem_ptr,
10625 rax_RegP oldval, rRegP newval,
10626 rFlagsReg cr)
10627 %{
10628 predicate(n->as_LoadStore()->barrier_data() == 0);
10629 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10630 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10631 effect(KILL cr, KILL oldval);
10632
10633 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10634 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10635 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10636 ins_encode %{
10637 __ lock();
10638 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10639 __ setcc(Assembler::equal, $res$$Register);
10640 %}
10641 ins_pipe( pipe_cmpxchg );
10642 %}
10643
10644 instruct compareAndSwapL(rRegI res,
10645 memory mem_ptr,
10646 rax_RegL oldval, rRegL newval,
10647 rFlagsReg cr)
10648 %{
10649 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10650 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10651 effect(KILL cr, KILL oldval);
10652
10653 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10654 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10655 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10656 ins_encode %{
10657 __ lock();
10658 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10659 __ setcc(Assembler::equal, $res$$Register);
10660 %}
10661 ins_pipe( pipe_cmpxchg );
10662 %}
10663
10664 instruct compareAndSwapI(rRegI res,
10665 memory mem_ptr,
10666 rax_RegI oldval, rRegI newval,
10667 rFlagsReg cr)
10668 %{
10669 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10670 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10671 effect(KILL cr, KILL oldval);
10672
10673 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10674 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10675 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10676 ins_encode %{
10677 __ lock();
10678 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10679 __ setcc(Assembler::equal, $res$$Register);
10680 %}
10681 ins_pipe( pipe_cmpxchg );
10682 %}
10683
10684 instruct compareAndSwapB(rRegI res,
10685 memory mem_ptr,
10686 rax_RegI oldval, rRegI newval,
10687 rFlagsReg cr)
10688 %{
10689 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10690 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10691 effect(KILL cr, KILL oldval);
10692
10693 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10694 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10695 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10696 ins_encode %{
10697 __ lock();
10698 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10699 __ setcc(Assembler::equal, $res$$Register);
10700 %}
10701 ins_pipe( pipe_cmpxchg );
10702 %}
10703
10704 instruct compareAndSwapS(rRegI res,
10705 memory mem_ptr,
10706 rax_RegI oldval, rRegI newval,
10707 rFlagsReg cr)
10708 %{
10709 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10710 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10711 effect(KILL cr, KILL oldval);
10712
10713 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10714 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10715 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10716 ins_encode %{
10717 __ lock();
10718 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10719 __ setcc(Assembler::equal, $res$$Register);
10720 %}
10721 ins_pipe( pipe_cmpxchg );
10722 %}
10723
10724 instruct compareAndSwapN(rRegI res,
10725 memory mem_ptr,
10726 rax_RegN oldval, rRegN newval,
10727 rFlagsReg cr) %{
10728 predicate(n->as_LoadStore()->barrier_data() == 0);
10729 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10730 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10731 effect(KILL cr, KILL oldval);
10732
10733 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10734 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10735 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10736 ins_encode %{
10737 __ lock();
10738 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10739 __ setcc(Assembler::equal, $res$$Register);
10740 %}
10741 ins_pipe( pipe_cmpxchg );
10742 %}
10743
10744 instruct compareAndExchangeB(
10745 memory mem_ptr,
10746 rax_RegI oldval, rRegI newval,
10747 rFlagsReg cr)
10748 %{
10749 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10750 effect(KILL cr);
10751
10752 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10753 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10754 ins_encode %{
10755 __ lock();
10756 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10757 %}
10758 ins_pipe( pipe_cmpxchg );
10759 %}
10760
10761 instruct compareAndExchangeS(
10762 memory mem_ptr,
10763 rax_RegI oldval, rRegI newval,
10764 rFlagsReg cr)
10765 %{
10766 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10767 effect(KILL cr);
10768
10769 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10770 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10771 ins_encode %{
10772 __ lock();
10773 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10774 %}
10775 ins_pipe( pipe_cmpxchg );
10776 %}
10777
10778 instruct compareAndExchangeI(
10779 memory mem_ptr,
10780 rax_RegI oldval, rRegI newval,
10781 rFlagsReg cr)
10782 %{
10783 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10784 effect(KILL cr);
10785
10786 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10787 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10788 ins_encode %{
10789 __ lock();
10790 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10791 %}
10792 ins_pipe( pipe_cmpxchg );
10793 %}
10794
10795 instruct compareAndExchangeL(
10796 memory mem_ptr,
10797 rax_RegL oldval, rRegL newval,
10798 rFlagsReg cr)
10799 %{
10800 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10801 effect(KILL cr);
10802
10803 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10804 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10805 ins_encode %{
10806 __ lock();
10807 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10808 %}
10809 ins_pipe( pipe_cmpxchg );
10810 %}
10811
10812 instruct compareAndExchangeN(
10813 memory mem_ptr,
10814 rax_RegN oldval, rRegN newval,
10815 rFlagsReg cr) %{
10816 predicate(n->as_LoadStore()->barrier_data() == 0);
10817 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10818 effect(KILL cr);
10819
10820 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10821 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10822 ins_encode %{
10823 __ lock();
10824 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10825 %}
10826 ins_pipe( pipe_cmpxchg );
10827 %}
10828
10829 instruct compareAndExchangeP(
10830 memory mem_ptr,
10831 rax_RegP oldval, rRegP newval,
10832 rFlagsReg cr)
10833 %{
10834 predicate(n->as_LoadStore()->barrier_data() == 0);
10835 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10836 effect(KILL cr);
10837
10838 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10839 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840 ins_encode %{
10841 __ lock();
10842 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10843 %}
10844 ins_pipe( pipe_cmpxchg );
10845 %}
10846
10847 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10848 predicate(n->as_LoadStore()->result_not_used());
10849 match(Set dummy (GetAndAddB mem add));
10850 effect(KILL cr);
10851 format %{ "addb_lock $mem, $add" %}
10852 ins_encode %{
10853 __ lock();
10854 __ addb($mem$$Address, $add$$Register);
10855 %}
10856 ins_pipe(pipe_cmpxchg);
10857 %}
10858
10859 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10860 predicate(n->as_LoadStore()->result_not_used());
10861 match(Set dummy (GetAndAddB mem add));
10862 effect(KILL cr);
10863 format %{ "addb_lock $mem, $add" %}
10864 ins_encode %{
10865 __ lock();
10866 __ addb($mem$$Address, $add$$constant);
10867 %}
10868 ins_pipe(pipe_cmpxchg);
10869 %}
10870
10871 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10872 predicate(!n->as_LoadStore()->result_not_used());
10873 match(Set newval (GetAndAddB mem newval));
10874 effect(KILL cr);
10875 format %{ "xaddb_lock $mem, $newval" %}
10876 ins_encode %{
10877 __ lock();
10878 __ xaddb($mem$$Address, $newval$$Register);
10879 %}
10880 ins_pipe(pipe_cmpxchg);
10881 %}
10882
10883 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10884 predicate(n->as_LoadStore()->result_not_used());
10885 match(Set dummy (GetAndAddS mem add));
10886 effect(KILL cr);
10887 format %{ "addw_lock $mem, $add" %}
10888 ins_encode %{
10889 __ lock();
10890 __ addw($mem$$Address, $add$$Register);
10891 %}
10892 ins_pipe(pipe_cmpxchg);
10893 %}
10894
10895 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10896 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10897 match(Set dummy (GetAndAddS mem add));
10898 effect(KILL cr);
10899 format %{ "addw_lock $mem, $add" %}
10900 ins_encode %{
10901 __ lock();
10902 __ addw($mem$$Address, $add$$constant);
10903 %}
10904 ins_pipe(pipe_cmpxchg);
10905 %}
10906
10907 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10908 predicate(!n->as_LoadStore()->result_not_used());
10909 match(Set newval (GetAndAddS mem newval));
10910 effect(KILL cr);
10911 format %{ "xaddw_lock $mem, $newval" %}
10912 ins_encode %{
10913 __ lock();
10914 __ xaddw($mem$$Address, $newval$$Register);
10915 %}
10916 ins_pipe(pipe_cmpxchg);
10917 %}
10918
10919 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10920 predicate(n->as_LoadStore()->result_not_used());
10921 match(Set dummy (GetAndAddI mem add));
10922 effect(KILL cr);
10923 format %{ "addl_lock $mem, $add" %}
10924 ins_encode %{
10925 __ lock();
10926 __ addl($mem$$Address, $add$$Register);
10927 %}
10928 ins_pipe(pipe_cmpxchg);
10929 %}
10930
10931 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10932 predicate(n->as_LoadStore()->result_not_used());
10933 match(Set dummy (GetAndAddI mem add));
10934 effect(KILL cr);
10935 format %{ "addl_lock $mem, $add" %}
10936 ins_encode %{
10937 __ lock();
10938 __ addl($mem$$Address, $add$$constant);
10939 %}
10940 ins_pipe(pipe_cmpxchg);
10941 %}
10942
10943 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10944 predicate(!n->as_LoadStore()->result_not_used());
10945 match(Set newval (GetAndAddI mem newval));
10946 effect(KILL cr);
10947 format %{ "xaddl_lock $mem, $newval" %}
10948 ins_encode %{
10949 __ lock();
10950 __ xaddl($mem$$Address, $newval$$Register);
10951 %}
10952 ins_pipe(pipe_cmpxchg);
10953 %}
10954
10955 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10956 predicate(n->as_LoadStore()->result_not_used());
10957 match(Set dummy (GetAndAddL mem add));
10958 effect(KILL cr);
10959 format %{ "addq_lock $mem, $add" %}
10960 ins_encode %{
10961 __ lock();
10962 __ addq($mem$$Address, $add$$Register);
10963 %}
10964 ins_pipe(pipe_cmpxchg);
10965 %}
10966
10967 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10968 predicate(n->as_LoadStore()->result_not_used());
10969 match(Set dummy (GetAndAddL mem add));
10970 effect(KILL cr);
10971 format %{ "addq_lock $mem, $add" %}
10972 ins_encode %{
10973 __ lock();
10974 __ addq($mem$$Address, $add$$constant);
10975 %}
10976 ins_pipe(pipe_cmpxchg);
10977 %}
10978
10979 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10980 predicate(!n->as_LoadStore()->result_not_used());
10981 match(Set newval (GetAndAddL mem newval));
10982 effect(KILL cr);
10983 format %{ "xaddq_lock $mem, $newval" %}
10984 ins_encode %{
10985 __ lock();
10986 __ xaddq($mem$$Address, $newval$$Register);
10987 %}
10988 ins_pipe(pipe_cmpxchg);
10989 %}
10990
10991 instruct xchgB( memory mem, rRegI newval) %{
10992 match(Set newval (GetAndSetB mem newval));
10993 format %{ "XCHGB $newval,[$mem]" %}
10994 ins_encode %{
10995 __ xchgb($newval$$Register, $mem$$Address);
10996 %}
10997 ins_pipe( pipe_cmpxchg );
10998 %}
10999
11000 instruct xchgS( memory mem, rRegI newval) %{
11001 match(Set newval (GetAndSetS mem newval));
11002 format %{ "XCHGW $newval,[$mem]" %}
11003 ins_encode %{
11004 __ xchgw($newval$$Register, $mem$$Address);
11005 %}
11006 ins_pipe( pipe_cmpxchg );
11007 %}
11008
11009 instruct xchgI( memory mem, rRegI newval) %{
11010 match(Set newval (GetAndSetI mem newval));
11011 format %{ "XCHGL $newval,[$mem]" %}
11012 ins_encode %{
11013 __ xchgl($newval$$Register, $mem$$Address);
11014 %}
11015 ins_pipe( pipe_cmpxchg );
11016 %}
11017
11018 instruct xchgL( memory mem, rRegL newval) %{
11019 match(Set newval (GetAndSetL mem newval));
11020 format %{ "XCHGL $newval,[$mem]" %}
11021 ins_encode %{
11022 __ xchgq($newval$$Register, $mem$$Address);
11023 %}
11024 ins_pipe( pipe_cmpxchg );
11025 %}
11026
11027 instruct xchgP( memory mem, rRegP newval) %{
11028 match(Set newval (GetAndSetP mem newval));
11029 predicate(n->as_LoadStore()->barrier_data() == 0);
11030 format %{ "XCHGQ $newval,[$mem]" %}
11031 ins_encode %{
11032 __ xchgq($newval$$Register, $mem$$Address);
11033 %}
11034 ins_pipe( pipe_cmpxchg );
11035 %}
11036
11037 instruct xchgN( memory mem, rRegN newval) %{
11038 predicate(n->as_LoadStore()->barrier_data() == 0);
11039 match(Set newval (GetAndSetN mem newval));
11040 format %{ "XCHGL $newval,$mem]" %}
11041 ins_encode %{
11042 __ xchgl($newval$$Register, $mem$$Address);
11043 %}
11044 ins_pipe( pipe_cmpxchg );
11045 %}
11046
11047 //----------Abs Instructions-------------------------------------------
11048
11049 // Integer Absolute Instructions
11050 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11051 %{
11052 match(Set dst (AbsI src));
11053 effect(TEMP dst, KILL cr);
11054 format %{ "xorl $dst, $dst\t# abs int\n\t"
11055 "subl $dst, $src\n\t"
11056 "cmovll $dst, $src" %}
11057 ins_encode %{
11058 __ xorl($dst$$Register, $dst$$Register);
11059 __ subl($dst$$Register, $src$$Register);
11060 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11061 %}
11062
11063 ins_pipe(ialu_reg_reg);
11064 %}
11065
11066 // Long Absolute Instructions
11067 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11068 %{
11069 match(Set dst (AbsL src));
11070 effect(TEMP dst, KILL cr);
11071 format %{ "xorl $dst, $dst\t# abs long\n\t"
11072 "subq $dst, $src\n\t"
11073 "cmovlq $dst, $src" %}
11074 ins_encode %{
11075 __ xorl($dst$$Register, $dst$$Register);
11076 __ subq($dst$$Register, $src$$Register);
11077 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11078 %}
11079
11080 ins_pipe(ialu_reg_reg);
11081 %}
11082
11083 //----------Subtraction Instructions-------------------------------------------
11084
11085 // Integer Subtraction Instructions
11086 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11087 %{
11088 predicate(!UseAPX);
11089 match(Set dst (SubI dst src));
11090 effect(KILL cr);
11091 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11092
11093 format %{ "subl $dst, $src\t# int" %}
11094 ins_encode %{
11095 __ subl($dst$$Register, $src$$Register);
11096 %}
11097 ins_pipe(ialu_reg_reg);
11098 %}
11099
11100 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11101 %{
11102 predicate(UseAPX);
11103 match(Set dst (SubI src1 src2));
11104 effect(KILL cr);
11105 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11106
11107 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11108 ins_encode %{
11109 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11110 %}
11111 ins_pipe(ialu_reg_reg);
11112 %}
11113
11114 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11115 %{
11116 predicate(UseAPX);
11117 match(Set dst (SubI src1 src2));
11118 effect(KILL cr);
11119 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11120
11121 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11122 ins_encode %{
11123 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11124 %}
11125 ins_pipe(ialu_reg_reg);
11126 %}
11127
11128 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11129 %{
11130 predicate(UseAPX);
11131 match(Set dst (SubI (LoadI src1) src2));
11132 effect(KILL cr);
11133 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11134
11135 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11136 ins_encode %{
11137 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11138 %}
11139 ins_pipe(ialu_reg_reg);
11140 %}
11141
11142 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11143 %{
11144 predicate(!UseAPX);
11145 match(Set dst (SubI dst (LoadI src)));
11146 effect(KILL cr);
11147 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11148
11149 ins_cost(150);
11150 format %{ "subl $dst, $src\t# int" %}
11151 ins_encode %{
11152 __ subl($dst$$Register, $src$$Address);
11153 %}
11154 ins_pipe(ialu_reg_mem);
11155 %}
11156
11157 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11158 %{
11159 predicate(UseAPX);
11160 match(Set dst (SubI src1 (LoadI src2)));
11161 effect(KILL cr);
11162 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11163
11164 ins_cost(150);
11165 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11166 ins_encode %{
11167 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11168 %}
11169 ins_pipe(ialu_reg_mem);
11170 %}
11171
11172 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11173 %{
11174 predicate(UseAPX);
11175 match(Set dst (SubI (LoadI src1) src2));
11176 effect(KILL cr);
11177 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11178
11179 ins_cost(150);
11180 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11181 ins_encode %{
11182 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11183 %}
11184 ins_pipe(ialu_reg_mem);
11185 %}
11186
11187 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11188 %{
11189 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11190 effect(KILL cr);
11191 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11192
11193 ins_cost(150);
11194 format %{ "subl $dst, $src\t# int" %}
11195 ins_encode %{
11196 __ subl($dst$$Address, $src$$Register);
11197 %}
11198 ins_pipe(ialu_mem_reg);
11199 %}
11200
11201 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11202 %{
11203 predicate(!UseAPX);
11204 match(Set dst (SubL dst src));
11205 effect(KILL cr);
11206 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11207
11208 format %{ "subq $dst, $src\t# long" %}
11209 ins_encode %{
11210 __ subq($dst$$Register, $src$$Register);
11211 %}
11212 ins_pipe(ialu_reg_reg);
11213 %}
11214
11215 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11216 %{
11217 predicate(UseAPX);
11218 match(Set dst (SubL src1 src2));
11219 effect(KILL cr);
11220 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11221
11222 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11223 ins_encode %{
11224 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11225 %}
11226 ins_pipe(ialu_reg_reg);
11227 %}
11228
11229 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11230 %{
11231 predicate(UseAPX);
11232 match(Set dst (SubL src1 src2));
11233 effect(KILL cr);
11234 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11235
11236 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11237 ins_encode %{
11238 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11239 %}
11240 ins_pipe(ialu_reg_reg);
11241 %}
11242
11243 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11244 %{
11245 predicate(UseAPX);
11246 match(Set dst (SubL (LoadL src1) src2));
11247 effect(KILL cr);
11248 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11249
11250 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11251 ins_encode %{
11252 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11253 %}
11254 ins_pipe(ialu_reg_reg);
11255 %}
11256
11257 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11258 %{
11259 predicate(!UseAPX);
11260 match(Set dst (SubL dst (LoadL src)));
11261 effect(KILL cr);
11262 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11263
11264 ins_cost(150);
11265 format %{ "subq $dst, $src\t# long" %}
11266 ins_encode %{
11267 __ subq($dst$$Register, $src$$Address);
11268 %}
11269 ins_pipe(ialu_reg_mem);
11270 %}
11271
11272 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11273 %{
11274 predicate(UseAPX);
11275 match(Set dst (SubL src1 (LoadL src2)));
11276 effect(KILL cr);
11277 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11278
11279 ins_cost(150);
11280 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11281 ins_encode %{
11282 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11283 %}
11284 ins_pipe(ialu_reg_mem);
11285 %}
11286
11287 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11288 %{
11289 predicate(UseAPX);
11290 match(Set dst (SubL (LoadL src1) src2));
11291 effect(KILL cr);
11292 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11293
11294 ins_cost(150);
11295 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11296 ins_encode %{
11297 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11298 %}
11299 ins_pipe(ialu_reg_mem);
11300 %}
11301
11302 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11303 %{
11304 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11305 effect(KILL cr);
11306 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11307
11308 ins_cost(150);
11309 format %{ "subq $dst, $src\t# long" %}
11310 ins_encode %{
11311 __ subq($dst$$Address, $src$$Register);
11312 %}
11313 ins_pipe(ialu_mem_reg);
11314 %}
11315
11316 // Subtract from a pointer
11317 // XXX hmpf???
11318 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11319 %{
11320 match(Set dst (AddP dst (SubI zero src)));
11321 effect(KILL cr);
11322
11323 format %{ "subq $dst, $src\t# ptr - int" %}
11324 ins_encode %{
11325 __ subq($dst$$Register, $src$$Register);
11326 %}
11327 ins_pipe(ialu_reg_reg);
11328 %}
11329
11330 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11331 %{
11332 predicate(!UseAPX);
11333 match(Set dst (SubI zero dst));
11334 effect(KILL cr);
11335 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11336
11337 format %{ "negl $dst\t# int" %}
11338 ins_encode %{
11339 __ negl($dst$$Register);
11340 %}
11341 ins_pipe(ialu_reg);
11342 %}
11343
11344 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11345 %{
11346 predicate(UseAPX);
11347 match(Set dst (SubI zero src));
11348 effect(KILL cr);
11349 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11350
11351 format %{ "enegl $dst, $src\t# int ndd" %}
11352 ins_encode %{
11353 __ enegl($dst$$Register, $src$$Register, false);
11354 %}
11355 ins_pipe(ialu_reg);
11356 %}
11357
11358 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11359 %{
11360 predicate(!UseAPX);
11361 match(Set dst (NegI dst));
11362 effect(KILL cr);
11363 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11364
11365 format %{ "negl $dst\t# int" %}
11366 ins_encode %{
11367 __ negl($dst$$Register);
11368 %}
11369 ins_pipe(ialu_reg);
11370 %}
11371
11372 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11373 %{
11374 predicate(UseAPX);
11375 match(Set dst (NegI src));
11376 effect(KILL cr);
11377 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11378
11379 format %{ "enegl $dst, $src\t# int ndd" %}
11380 ins_encode %{
11381 __ enegl($dst$$Register, $src$$Register, false);
11382 %}
11383 ins_pipe(ialu_reg);
11384 %}
11385
11386 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11387 %{
11388 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11389 effect(KILL cr);
11390 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11391
11392 format %{ "negl $dst\t# int" %}
11393 ins_encode %{
11394 __ negl($dst$$Address);
11395 %}
11396 ins_pipe(ialu_reg);
11397 %}
11398
11399 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11400 %{
11401 predicate(!UseAPX);
11402 match(Set dst (SubL zero dst));
11403 effect(KILL cr);
11404 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11405
11406 format %{ "negq $dst\t# long" %}
11407 ins_encode %{
11408 __ negq($dst$$Register);
11409 %}
11410 ins_pipe(ialu_reg);
11411 %}
11412
11413 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11414 %{
11415 predicate(UseAPX);
11416 match(Set dst (SubL zero src));
11417 effect(KILL cr);
11418 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11419
11420 format %{ "enegq $dst, $src\t# long ndd" %}
11421 ins_encode %{
11422 __ enegq($dst$$Register, $src$$Register, false);
11423 %}
11424 ins_pipe(ialu_reg);
11425 %}
11426
11427 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11428 %{
11429 predicate(!UseAPX);
11430 match(Set dst (NegL dst));
11431 effect(KILL cr);
11432 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11433
11434 format %{ "negq $dst\t# int" %}
11435 ins_encode %{
11436 __ negq($dst$$Register);
11437 %}
11438 ins_pipe(ialu_reg);
11439 %}
11440
11441 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11442 %{
11443 predicate(UseAPX);
11444 match(Set dst (NegL src));
11445 effect(KILL cr);
11446 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11447
11448 format %{ "enegq $dst, $src\t# long ndd" %}
11449 ins_encode %{
11450 __ enegq($dst$$Register, $src$$Register, false);
11451 %}
11452 ins_pipe(ialu_reg);
11453 %}
11454
11455 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11456 %{
11457 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11458 effect(KILL cr);
11459 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11460
11461 format %{ "negq $dst\t# long" %}
11462 ins_encode %{
11463 __ negq($dst$$Address);
11464 %}
11465 ins_pipe(ialu_reg);
11466 %}
11467
11468 //----------Multiplication/Division Instructions-------------------------------
11469 // Integer Multiplication Instructions
11470 // Multiply Register
11471
11472 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11473 %{
11474 predicate(!UseAPX);
11475 match(Set dst (MulI dst src));
11476 effect(KILL cr);
11477
11478 ins_cost(300);
11479 format %{ "imull $dst, $src\t# int" %}
11480 ins_encode %{
11481 __ imull($dst$$Register, $src$$Register);
11482 %}
11483 ins_pipe(ialu_reg_reg_alu0);
11484 %}
11485
11486 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11487 %{
11488 predicate(UseAPX);
11489 match(Set dst (MulI src1 src2));
11490 effect(KILL cr);
11491 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11492
11493 ins_cost(300);
11494 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11495 ins_encode %{
11496 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11497 %}
11498 ins_pipe(ialu_reg_reg_alu0);
11499 %}
11500
11501 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11502 %{
11503 match(Set dst (MulI src imm));
11504 effect(KILL cr);
11505
11506 ins_cost(300);
11507 format %{ "imull $dst, $src, $imm\t# int" %}
11508 ins_encode %{
11509 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11510 %}
11511 ins_pipe(ialu_reg_reg_alu0);
11512 %}
11513
11514 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11515 %{
11516 predicate(!UseAPX);
11517 match(Set dst (MulI dst (LoadI src)));
11518 effect(KILL cr);
11519
11520 ins_cost(350);
11521 format %{ "imull $dst, $src\t# int" %}
11522 ins_encode %{
11523 __ imull($dst$$Register, $src$$Address);
11524 %}
11525 ins_pipe(ialu_reg_mem_alu0);
11526 %}
11527
11528 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11529 %{
11530 predicate(UseAPX);
11531 match(Set dst (MulI src1 (LoadI src2)));
11532 effect(KILL cr);
11533 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11534
11535 ins_cost(350);
11536 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11537 ins_encode %{
11538 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11539 %}
11540 ins_pipe(ialu_reg_mem_alu0);
11541 %}
11542
11543 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11544 %{
11545 match(Set dst (MulI (LoadI src) imm));
11546 effect(KILL cr);
11547
11548 ins_cost(300);
11549 format %{ "imull $dst, $src, $imm\t# int" %}
11550 ins_encode %{
11551 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11552 %}
11553 ins_pipe(ialu_reg_mem_alu0);
11554 %}
11555
11556 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11557 %{
11558 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11559 effect(KILL cr, KILL src2);
11560
11561 expand %{ mulI_rReg(dst, src1, cr);
11562 mulI_rReg(src2, src3, cr);
11563 addI_rReg(dst, src2, cr); %}
11564 %}
11565
11566 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11567 %{
11568 predicate(!UseAPX);
11569 match(Set dst (MulL dst src));
11570 effect(KILL cr);
11571
11572 ins_cost(300);
11573 format %{ "imulq $dst, $src\t# long" %}
11574 ins_encode %{
11575 __ imulq($dst$$Register, $src$$Register);
11576 %}
11577 ins_pipe(ialu_reg_reg_alu0);
11578 %}
11579
11580 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11581 %{
11582 predicate(UseAPX);
11583 match(Set dst (MulL src1 src2));
11584 effect(KILL cr);
11585 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11586
11587 ins_cost(300);
11588 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11589 ins_encode %{
11590 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11591 %}
11592 ins_pipe(ialu_reg_reg_alu0);
11593 %}
11594
11595 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11596 %{
11597 match(Set dst (MulL src imm));
11598 effect(KILL cr);
11599
11600 ins_cost(300);
11601 format %{ "imulq $dst, $src, $imm\t# long" %}
11602 ins_encode %{
11603 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11604 %}
11605 ins_pipe(ialu_reg_reg_alu0);
11606 %}
11607
11608 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11609 %{
11610 predicate(!UseAPX);
11611 match(Set dst (MulL dst (LoadL src)));
11612 effect(KILL cr);
11613
11614 ins_cost(350);
11615 format %{ "imulq $dst, $src\t# long" %}
11616 ins_encode %{
11617 __ imulq($dst$$Register, $src$$Address);
11618 %}
11619 ins_pipe(ialu_reg_mem_alu0);
11620 %}
11621
11622 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11623 %{
11624 predicate(UseAPX);
11625 match(Set dst (MulL src1 (LoadL src2)));
11626 effect(KILL cr);
11627 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11628
11629 ins_cost(350);
11630 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11631 ins_encode %{
11632 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11633 %}
11634 ins_pipe(ialu_reg_mem_alu0);
11635 %}
11636
11637 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11638 %{
11639 match(Set dst (MulL (LoadL src) imm));
11640 effect(KILL cr);
11641
11642 ins_cost(300);
11643 format %{ "imulq $dst, $src, $imm\t# long" %}
11644 ins_encode %{
11645 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11646 %}
11647 ins_pipe(ialu_reg_mem_alu0);
11648 %}
11649
11650 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11651 %{
11652 match(Set dst (MulHiL src rax));
11653 effect(USE_KILL rax, KILL cr);
11654
11655 ins_cost(300);
11656 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11657 ins_encode %{
11658 __ imulq($src$$Register);
11659 %}
11660 ins_pipe(ialu_reg_reg_alu0);
11661 %}
11662
11663 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11664 %{
11665 match(Set dst (UMulHiL src rax));
11666 effect(USE_KILL rax, KILL cr);
11667
11668 ins_cost(300);
11669 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11670 ins_encode %{
11671 __ mulq($src$$Register);
11672 %}
11673 ins_pipe(ialu_reg_reg_alu0);
11674 %}
11675
11676 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11677 rFlagsReg cr)
11678 %{
11679 match(Set rax (DivI rax div));
11680 effect(KILL rdx, KILL cr);
11681
11682 ins_cost(30*100+10*100); // XXX
11683 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11684 "jne,s normal\n\t"
11685 "xorl rdx, rdx\n\t"
11686 "cmpl $div, -1\n\t"
11687 "je,s done\n"
11688 "normal: cdql\n\t"
11689 "idivl $div\n"
11690 "done:" %}
11691 ins_encode(cdql_enc(div));
11692 ins_pipe(ialu_reg_reg_alu0);
11693 %}
11694
11695 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11696 rFlagsReg cr)
11697 %{
11698 match(Set rax (DivL rax div));
11699 effect(KILL rdx, KILL cr);
11700
11701 ins_cost(30*100+10*100); // XXX
11702 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11703 "cmpq rax, rdx\n\t"
11704 "jne,s normal\n\t"
11705 "xorl rdx, rdx\n\t"
11706 "cmpq $div, -1\n\t"
11707 "je,s done\n"
11708 "normal: cdqq\n\t"
11709 "idivq $div\n"
11710 "done:" %}
11711 ins_encode(cdqq_enc(div));
11712 ins_pipe(ialu_reg_reg_alu0);
11713 %}
11714
11715 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11716 %{
11717 match(Set rax (UDivI rax div));
11718 effect(KILL rdx, KILL cr);
11719
11720 ins_cost(300);
11721 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11722 ins_encode %{
11723 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11724 %}
11725 ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727
11728 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11729 %{
11730 match(Set rax (UDivL rax div));
11731 effect(KILL rdx, KILL cr);
11732
11733 ins_cost(300);
11734 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11735 ins_encode %{
11736 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11737 %}
11738 ins_pipe(ialu_reg_reg_alu0);
11739 %}
11740
11741 // Integer DIVMOD with Register, both quotient and mod results
11742 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11743 rFlagsReg cr)
11744 %{
11745 match(DivModI rax div);
11746 effect(KILL cr);
11747
11748 ins_cost(30*100+10*100); // XXX
11749 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11750 "jne,s normal\n\t"
11751 "xorl rdx, rdx\n\t"
11752 "cmpl $div, -1\n\t"
11753 "je,s done\n"
11754 "normal: cdql\n\t"
11755 "idivl $div\n"
11756 "done:" %}
11757 ins_encode(cdql_enc(div));
11758 ins_pipe(pipe_slow);
11759 %}
11760
11761 // Long DIVMOD with Register, both quotient and mod results
11762 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11763 rFlagsReg cr)
11764 %{
11765 match(DivModL rax div);
11766 effect(KILL cr);
11767
11768 ins_cost(30*100+10*100); // XXX
11769 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11770 "cmpq rax, rdx\n\t"
11771 "jne,s normal\n\t"
11772 "xorl rdx, rdx\n\t"
11773 "cmpq $div, -1\n\t"
11774 "je,s done\n"
11775 "normal: cdqq\n\t"
11776 "idivq $div\n"
11777 "done:" %}
11778 ins_encode(cdqq_enc(div));
11779 ins_pipe(pipe_slow);
11780 %}
11781
11782 // Unsigned integer DIVMOD with Register, both quotient and mod results
11783 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11784 no_rax_rdx_RegI div, rFlagsReg cr)
11785 %{
11786 match(UDivModI rax div);
11787 effect(TEMP tmp, KILL cr);
11788
11789 ins_cost(300);
11790 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11791 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11792 %}
11793 ins_encode %{
11794 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11795 %}
11796 ins_pipe(pipe_slow);
11797 %}
11798
11799 // Unsigned long DIVMOD with Register, both quotient and mod results
11800 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11801 no_rax_rdx_RegL div, rFlagsReg cr)
11802 %{
11803 match(UDivModL rax div);
11804 effect(TEMP tmp, KILL cr);
11805
11806 ins_cost(300);
11807 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11808 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11809 %}
11810 ins_encode %{
11811 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11812 %}
11813 ins_pipe(pipe_slow);
11814 %}
11815
11816 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11817 rFlagsReg cr)
11818 %{
11819 match(Set rdx (ModI rax div));
11820 effect(KILL rax, KILL cr);
11821
11822 ins_cost(300); // XXX
11823 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11824 "jne,s normal\n\t"
11825 "xorl rdx, rdx\n\t"
11826 "cmpl $div, -1\n\t"
11827 "je,s done\n"
11828 "normal: cdql\n\t"
11829 "idivl $div\n"
11830 "done:" %}
11831 ins_encode(cdql_enc(div));
11832 ins_pipe(ialu_reg_reg_alu0);
11833 %}
11834
11835 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11836 rFlagsReg cr)
11837 %{
11838 match(Set rdx (ModL rax div));
11839 effect(KILL rax, KILL cr);
11840
11841 ins_cost(300); // XXX
11842 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11843 "cmpq rax, rdx\n\t"
11844 "jne,s normal\n\t"
11845 "xorl rdx, rdx\n\t"
11846 "cmpq $div, -1\n\t"
11847 "je,s done\n"
11848 "normal: cdqq\n\t"
11849 "idivq $div\n"
11850 "done:" %}
11851 ins_encode(cdqq_enc(div));
11852 ins_pipe(ialu_reg_reg_alu0);
11853 %}
11854
11855 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11856 %{
11857 match(Set rdx (UModI rax div));
11858 effect(KILL rax, KILL cr);
11859
11860 ins_cost(300);
11861 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11862 ins_encode %{
11863 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11864 %}
11865 ins_pipe(ialu_reg_reg_alu0);
11866 %}
11867
11868 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11869 %{
11870 match(Set rdx (UModL rax div));
11871 effect(KILL rax, KILL cr);
11872
11873 ins_cost(300);
11874 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11875 ins_encode %{
11876 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11877 %}
11878 ins_pipe(ialu_reg_reg_alu0);
11879 %}
11880
11881 // Integer Shift Instructions
11882 // Shift Left by one, two, three
11883 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11884 %{
11885 predicate(!UseAPX);
11886 match(Set dst (LShiftI dst shift));
11887 effect(KILL cr);
11888
11889 format %{ "sall $dst, $shift" %}
11890 ins_encode %{
11891 __ sall($dst$$Register, $shift$$constant);
11892 %}
11893 ins_pipe(ialu_reg);
11894 %}
11895
11896 // Shift Left by one, two, three
11897 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11898 %{
11899 predicate(UseAPX);
11900 match(Set dst (LShiftI src shift));
11901 effect(KILL cr);
11902 flag(PD::Flag_ndd_demotable_opr1);
11903
11904 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11905 ins_encode %{
11906 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11907 %}
11908 ins_pipe(ialu_reg);
11909 %}
11910
11911 // Shift Left by 8-bit immediate
11912 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11913 %{
11914 predicate(!UseAPX);
11915 match(Set dst (LShiftI dst shift));
11916 effect(KILL cr);
11917
11918 format %{ "sall $dst, $shift" %}
11919 ins_encode %{
11920 __ sall($dst$$Register, $shift$$constant);
11921 %}
11922 ins_pipe(ialu_reg);
11923 %}
11924
11925 // Shift Left by 8-bit immediate
11926 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11927 %{
11928 predicate(UseAPX);
11929 match(Set dst (LShiftI src shift));
11930 effect(KILL cr);
11931 flag(PD::Flag_ndd_demotable_opr1);
11932
11933 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11934 ins_encode %{
11935 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11936 %}
11937 ins_pipe(ialu_reg);
11938 %}
11939
11940 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11941 %{
11942 predicate(UseAPX);
11943 match(Set dst (LShiftI (LoadI src) shift));
11944 effect(KILL cr);
11945
11946 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11947 ins_encode %{
11948 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11949 %}
11950 ins_pipe(ialu_reg);
11951 %}
11952
11953 // Shift Left by 8-bit immediate
11954 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11955 %{
11956 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11957 effect(KILL cr);
11958
11959 format %{ "sall $dst, $shift" %}
11960 ins_encode %{
11961 __ sall($dst$$Address, $shift$$constant);
11962 %}
11963 ins_pipe(ialu_mem_imm);
11964 %}
11965
11966 // Shift Left by variable
11967 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11968 %{
11969 predicate(!VM_Version::supports_bmi2());
11970 match(Set dst (LShiftI dst shift));
11971 effect(KILL cr);
11972
11973 format %{ "sall $dst, $shift" %}
11974 ins_encode %{
11975 __ sall($dst$$Register);
11976 %}
11977 ins_pipe(ialu_reg_reg);
11978 %}
11979
11980 // Shift Left by variable
11981 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11982 %{
11983 predicate(!VM_Version::supports_bmi2());
11984 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11985 effect(KILL cr);
11986
11987 format %{ "sall $dst, $shift" %}
11988 ins_encode %{
11989 __ sall($dst$$Address);
11990 %}
11991 ins_pipe(ialu_mem_reg);
11992 %}
11993
11994 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11995 %{
11996 predicate(VM_Version::supports_bmi2());
11997 match(Set dst (LShiftI src shift));
11998
11999 format %{ "shlxl $dst, $src, $shift" %}
12000 ins_encode %{
12001 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12002 %}
12003 ins_pipe(ialu_reg_reg);
12004 %}
12005
12006 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12007 %{
12008 predicate(VM_Version::supports_bmi2());
12009 match(Set dst (LShiftI (LoadI src) shift));
12010 ins_cost(175);
12011 format %{ "shlxl $dst, $src, $shift" %}
12012 ins_encode %{
12013 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12014 %}
12015 ins_pipe(ialu_reg_mem);
12016 %}
12017
12018 // Arithmetic Shift Right by 8-bit immediate
12019 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12020 %{
12021 predicate(!UseAPX);
12022 match(Set dst (RShiftI dst shift));
12023 effect(KILL cr);
12024
12025 format %{ "sarl $dst, $shift" %}
12026 ins_encode %{
12027 __ sarl($dst$$Register, $shift$$constant);
12028 %}
12029 ins_pipe(ialu_mem_imm);
12030 %}
12031
12032 // Arithmetic Shift Right by 8-bit immediate
12033 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12034 %{
12035 predicate(UseAPX);
12036 match(Set dst (RShiftI src shift));
12037 effect(KILL cr);
12038 flag(PD::Flag_ndd_demotable_opr1);
12039
12040 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12041 ins_encode %{
12042 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12043 %}
12044 ins_pipe(ialu_mem_imm);
12045 %}
12046
12047 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12048 %{
12049 predicate(UseAPX);
12050 match(Set dst (RShiftI (LoadI src) shift));
12051 effect(KILL cr);
12052
12053 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12054 ins_encode %{
12055 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12056 %}
12057 ins_pipe(ialu_mem_imm);
12058 %}
12059
12060 // Arithmetic Shift Right by 8-bit immediate
12061 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12062 %{
12063 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12064 effect(KILL cr);
12065
12066 format %{ "sarl $dst, $shift" %}
12067 ins_encode %{
12068 __ sarl($dst$$Address, $shift$$constant);
12069 %}
12070 ins_pipe(ialu_mem_imm);
12071 %}
12072
12073 // Arithmetic Shift Right by variable
12074 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12075 %{
12076 predicate(!VM_Version::supports_bmi2());
12077 match(Set dst (RShiftI dst shift));
12078 effect(KILL cr);
12079
12080 format %{ "sarl $dst, $shift" %}
12081 ins_encode %{
12082 __ sarl($dst$$Register);
12083 %}
12084 ins_pipe(ialu_reg_reg);
12085 %}
12086
12087 // Arithmetic Shift Right by variable
12088 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12089 %{
12090 predicate(!VM_Version::supports_bmi2());
12091 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12092 effect(KILL cr);
12093
12094 format %{ "sarl $dst, $shift" %}
12095 ins_encode %{
12096 __ sarl($dst$$Address);
12097 %}
12098 ins_pipe(ialu_mem_reg);
12099 %}
12100
12101 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12102 %{
12103 predicate(VM_Version::supports_bmi2());
12104 match(Set dst (RShiftI src shift));
12105
12106 format %{ "sarxl $dst, $src, $shift" %}
12107 ins_encode %{
12108 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12109 %}
12110 ins_pipe(ialu_reg_reg);
12111 %}
12112
12113 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12114 %{
12115 predicate(VM_Version::supports_bmi2());
12116 match(Set dst (RShiftI (LoadI src) shift));
12117 ins_cost(175);
12118 format %{ "sarxl $dst, $src, $shift" %}
12119 ins_encode %{
12120 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12121 %}
12122 ins_pipe(ialu_reg_mem);
12123 %}
12124
12125 // Logical Shift Right by 8-bit immediate
12126 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12127 %{
12128 predicate(!UseAPX);
12129 match(Set dst (URShiftI dst shift));
12130 effect(KILL cr);
12131
12132 format %{ "shrl $dst, $shift" %}
12133 ins_encode %{
12134 __ shrl($dst$$Register, $shift$$constant);
12135 %}
12136 ins_pipe(ialu_reg);
12137 %}
12138
12139 // Logical Shift Right by 8-bit immediate
12140 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12141 %{
12142 predicate(UseAPX);
12143 match(Set dst (URShiftI src shift));
12144 effect(KILL cr);
12145 flag(PD::Flag_ndd_demotable_opr1);
12146
12147 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12148 ins_encode %{
12149 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12150 %}
12151 ins_pipe(ialu_reg);
12152 %}
12153
12154 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12155 %{
12156 predicate(UseAPX);
12157 match(Set dst (URShiftI (LoadI src) shift));
12158 effect(KILL cr);
12159
12160 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12161 ins_encode %{
12162 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12163 %}
12164 ins_pipe(ialu_reg);
12165 %}
12166
12167 // Logical Shift Right by 8-bit immediate
12168 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12169 %{
12170 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12171 effect(KILL cr);
12172
12173 format %{ "shrl $dst, $shift" %}
12174 ins_encode %{
12175 __ shrl($dst$$Address, $shift$$constant);
12176 %}
12177 ins_pipe(ialu_mem_imm);
12178 %}
12179
12180 // Logical Shift Right by variable
12181 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12182 %{
12183 predicate(!VM_Version::supports_bmi2());
12184 match(Set dst (URShiftI dst shift));
12185 effect(KILL cr);
12186
12187 format %{ "shrl $dst, $shift" %}
12188 ins_encode %{
12189 __ shrl($dst$$Register);
12190 %}
12191 ins_pipe(ialu_reg_reg);
12192 %}
12193
12194 // Logical Shift Right by variable
12195 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12196 %{
12197 predicate(!VM_Version::supports_bmi2());
12198 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12199 effect(KILL cr);
12200
12201 format %{ "shrl $dst, $shift" %}
12202 ins_encode %{
12203 __ shrl($dst$$Address);
12204 %}
12205 ins_pipe(ialu_mem_reg);
12206 %}
12207
12208 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12209 %{
12210 predicate(VM_Version::supports_bmi2());
12211 match(Set dst (URShiftI src shift));
12212
12213 format %{ "shrxl $dst, $src, $shift" %}
12214 ins_encode %{
12215 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12216 %}
12217 ins_pipe(ialu_reg_reg);
12218 %}
12219
12220 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12221 %{
12222 predicate(VM_Version::supports_bmi2());
12223 match(Set dst (URShiftI (LoadI src) shift));
12224 ins_cost(175);
12225 format %{ "shrxl $dst, $src, $shift" %}
12226 ins_encode %{
12227 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12228 %}
12229 ins_pipe(ialu_reg_mem);
12230 %}
12231
12232 // Long Shift Instructions
12233 // Shift Left by one, two, three
12234 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12235 %{
12236 predicate(!UseAPX);
12237 match(Set dst (LShiftL dst shift));
12238 effect(KILL cr);
12239
12240 format %{ "salq $dst, $shift" %}
12241 ins_encode %{
12242 __ salq($dst$$Register, $shift$$constant);
12243 %}
12244 ins_pipe(ialu_reg);
12245 %}
12246
12247 // Shift Left by one, two, three
12248 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12249 %{
12250 predicate(UseAPX);
12251 match(Set dst (LShiftL src shift));
12252 effect(KILL cr);
12253 flag(PD::Flag_ndd_demotable_opr1);
12254
12255 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12256 ins_encode %{
12257 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12258 %}
12259 ins_pipe(ialu_reg);
12260 %}
12261
12262 // Shift Left by 8-bit immediate
12263 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12264 %{
12265 predicate(!UseAPX);
12266 match(Set dst (LShiftL dst shift));
12267 effect(KILL cr);
12268
12269 format %{ "salq $dst, $shift" %}
12270 ins_encode %{
12271 __ salq($dst$$Register, $shift$$constant);
12272 %}
12273 ins_pipe(ialu_reg);
12274 %}
12275
12276 // Shift Left by 8-bit immediate
12277 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12278 %{
12279 predicate(UseAPX);
12280 match(Set dst (LShiftL src shift));
12281 effect(KILL cr);
12282 flag(PD::Flag_ndd_demotable_opr1);
12283
12284 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12285 ins_encode %{
12286 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12287 %}
12288 ins_pipe(ialu_reg);
12289 %}
12290
12291 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12292 %{
12293 predicate(UseAPX);
12294 match(Set dst (LShiftL (LoadL src) shift));
12295 effect(KILL cr);
12296
12297 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12298 ins_encode %{
12299 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12300 %}
12301 ins_pipe(ialu_reg);
12302 %}
12303
12304 // Shift Left by 8-bit immediate
12305 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12306 %{
12307 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12308 effect(KILL cr);
12309
12310 format %{ "salq $dst, $shift" %}
12311 ins_encode %{
12312 __ salq($dst$$Address, $shift$$constant);
12313 %}
12314 ins_pipe(ialu_mem_imm);
12315 %}
12316
12317 // Shift Left by variable
12318 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12319 %{
12320 predicate(!VM_Version::supports_bmi2());
12321 match(Set dst (LShiftL dst shift));
12322 effect(KILL cr);
12323
12324 format %{ "salq $dst, $shift" %}
12325 ins_encode %{
12326 __ salq($dst$$Register);
12327 %}
12328 ins_pipe(ialu_reg_reg);
12329 %}
12330
12331 // Shift Left by variable
12332 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12333 %{
12334 predicate(!VM_Version::supports_bmi2());
12335 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12336 effect(KILL cr);
12337
12338 format %{ "salq $dst, $shift" %}
12339 ins_encode %{
12340 __ salq($dst$$Address);
12341 %}
12342 ins_pipe(ialu_mem_reg);
12343 %}
12344
12345 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12346 %{
12347 predicate(VM_Version::supports_bmi2());
12348 match(Set dst (LShiftL src shift));
12349
12350 format %{ "shlxq $dst, $src, $shift" %}
12351 ins_encode %{
12352 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12353 %}
12354 ins_pipe(ialu_reg_reg);
12355 %}
12356
12357 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12358 %{
12359 predicate(VM_Version::supports_bmi2());
12360 match(Set dst (LShiftL (LoadL src) shift));
12361 ins_cost(175);
12362 format %{ "shlxq $dst, $src, $shift" %}
12363 ins_encode %{
12364 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12365 %}
12366 ins_pipe(ialu_reg_mem);
12367 %}
12368
12369 // Arithmetic Shift Right by 8-bit immediate
12370 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12371 %{
12372 predicate(!UseAPX);
12373 match(Set dst (RShiftL dst shift));
12374 effect(KILL cr);
12375
12376 format %{ "sarq $dst, $shift" %}
12377 ins_encode %{
12378 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12379 %}
12380 ins_pipe(ialu_mem_imm);
12381 %}
12382
12383 // Arithmetic Shift Right by 8-bit immediate
12384 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12385 %{
12386 predicate(UseAPX);
12387 match(Set dst (RShiftL src shift));
12388 effect(KILL cr);
12389 flag(PD::Flag_ndd_demotable_opr1);
12390
12391 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12392 ins_encode %{
12393 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12394 %}
12395 ins_pipe(ialu_mem_imm);
12396 %}
12397
12398 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12399 %{
12400 predicate(UseAPX);
12401 match(Set dst (RShiftL (LoadL src) shift));
12402 effect(KILL cr);
12403
12404 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12405 ins_encode %{
12406 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12407 %}
12408 ins_pipe(ialu_mem_imm);
12409 %}
12410
12411 // Arithmetic Shift Right by 8-bit immediate
12412 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12413 %{
12414 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12415 effect(KILL cr);
12416
12417 format %{ "sarq $dst, $shift" %}
12418 ins_encode %{
12419 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12420 %}
12421 ins_pipe(ialu_mem_imm);
12422 %}
12423
12424 // Arithmetic Shift Right by variable
12425 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12426 %{
12427 predicate(!VM_Version::supports_bmi2());
12428 match(Set dst (RShiftL dst shift));
12429 effect(KILL cr);
12430
12431 format %{ "sarq $dst, $shift" %}
12432 ins_encode %{
12433 __ sarq($dst$$Register);
12434 %}
12435 ins_pipe(ialu_reg_reg);
12436 %}
12437
12438 // Arithmetic Shift Right by variable
12439 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12440 %{
12441 predicate(!VM_Version::supports_bmi2());
12442 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12443 effect(KILL cr);
12444
12445 format %{ "sarq $dst, $shift" %}
12446 ins_encode %{
12447 __ sarq($dst$$Address);
12448 %}
12449 ins_pipe(ialu_mem_reg);
12450 %}
12451
12452 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12453 %{
12454 predicate(VM_Version::supports_bmi2());
12455 match(Set dst (RShiftL src shift));
12456
12457 format %{ "sarxq $dst, $src, $shift" %}
12458 ins_encode %{
12459 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12460 %}
12461 ins_pipe(ialu_reg_reg);
12462 %}
12463
12464 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12465 %{
12466 predicate(VM_Version::supports_bmi2());
12467 match(Set dst (RShiftL (LoadL src) shift));
12468 ins_cost(175);
12469 format %{ "sarxq $dst, $src, $shift" %}
12470 ins_encode %{
12471 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12472 %}
12473 ins_pipe(ialu_reg_mem);
12474 %}
12475
12476 // Logical Shift Right by 8-bit immediate
12477 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12478 %{
12479 predicate(!UseAPX);
12480 match(Set dst (URShiftL dst shift));
12481 effect(KILL cr);
12482
12483 format %{ "shrq $dst, $shift" %}
12484 ins_encode %{
12485 __ shrq($dst$$Register, $shift$$constant);
12486 %}
12487 ins_pipe(ialu_reg);
12488 %}
12489
12490 // Logical Shift Right by 8-bit immediate
12491 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12492 %{
12493 predicate(UseAPX);
12494 match(Set dst (URShiftL src shift));
12495 effect(KILL cr);
12496 flag(PD::Flag_ndd_demotable_opr1);
12497
12498 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12499 ins_encode %{
12500 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12501 %}
12502 ins_pipe(ialu_reg);
12503 %}
12504
12505 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12506 %{
12507 predicate(UseAPX);
12508 match(Set dst (URShiftL (LoadL src) shift));
12509 effect(KILL cr);
12510
12511 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12512 ins_encode %{
12513 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12514 %}
12515 ins_pipe(ialu_reg);
12516 %}
12517
12518 // Logical Shift Right by 8-bit immediate
12519 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12520 %{
12521 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12522 effect(KILL cr);
12523
12524 format %{ "shrq $dst, $shift" %}
12525 ins_encode %{
12526 __ shrq($dst$$Address, $shift$$constant);
12527 %}
12528 ins_pipe(ialu_mem_imm);
12529 %}
12530
12531 // Logical Shift Right by variable
12532 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12533 %{
12534 predicate(!VM_Version::supports_bmi2());
12535 match(Set dst (URShiftL dst shift));
12536 effect(KILL cr);
12537
12538 format %{ "shrq $dst, $shift" %}
12539 ins_encode %{
12540 __ shrq($dst$$Register);
12541 %}
12542 ins_pipe(ialu_reg_reg);
12543 %}
12544
12545 // Logical Shift Right by variable
12546 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12547 %{
12548 predicate(!VM_Version::supports_bmi2());
12549 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12550 effect(KILL cr);
12551
12552 format %{ "shrq $dst, $shift" %}
12553 ins_encode %{
12554 __ shrq($dst$$Address);
12555 %}
12556 ins_pipe(ialu_mem_reg);
12557 %}
12558
12559 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12560 %{
12561 predicate(VM_Version::supports_bmi2());
12562 match(Set dst (URShiftL src shift));
12563
12564 format %{ "shrxq $dst, $src, $shift" %}
12565 ins_encode %{
12566 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12567 %}
12568 ins_pipe(ialu_reg_reg);
12569 %}
12570
12571 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12572 %{
12573 predicate(VM_Version::supports_bmi2());
12574 match(Set dst (URShiftL (LoadL src) shift));
12575 ins_cost(175);
12576 format %{ "shrxq $dst, $src, $shift" %}
12577 ins_encode %{
12578 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12579 %}
12580 ins_pipe(ialu_reg_mem);
12581 %}
12582
12583 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12584 // This idiom is used by the compiler for the i2b bytecode.
12585 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12586 %{
12587 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12588
12589 format %{ "movsbl $dst, $src\t# i2b" %}
12590 ins_encode %{
12591 __ movsbl($dst$$Register, $src$$Register);
12592 %}
12593 ins_pipe(ialu_reg_reg);
12594 %}
12595
12596 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12597 // This idiom is used by the compiler the i2s bytecode.
12598 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12599 %{
12600 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12601
12602 format %{ "movswl $dst, $src\t# i2s" %}
12603 ins_encode %{
12604 __ movswl($dst$$Register, $src$$Register);
12605 %}
12606 ins_pipe(ialu_reg_reg);
12607 %}
12608
12609 // ROL/ROR instructions
12610
12611 // Rotate left by constant.
12612 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12613 %{
12614 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12615 match(Set dst (RotateLeft dst shift));
12616 effect(KILL cr);
12617 format %{ "roll $dst, $shift" %}
12618 ins_encode %{
12619 __ roll($dst$$Register, $shift$$constant);
12620 %}
12621 ins_pipe(ialu_reg);
12622 %}
12623
12624 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12625 %{
12626 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12627 match(Set dst (RotateLeft src shift));
12628 format %{ "rolxl $dst, $src, $shift" %}
12629 ins_encode %{
12630 int shift = 32 - ($shift$$constant & 31);
12631 __ rorxl($dst$$Register, $src$$Register, shift);
12632 %}
12633 ins_pipe(ialu_reg_reg);
12634 %}
12635
12636 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12637 %{
12638 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12639 match(Set dst (RotateLeft (LoadI src) shift));
12640 ins_cost(175);
12641 format %{ "rolxl $dst, $src, $shift" %}
12642 ins_encode %{
12643 int shift = 32 - ($shift$$constant & 31);
12644 __ rorxl($dst$$Register, $src$$Address, shift);
12645 %}
12646 ins_pipe(ialu_reg_mem);
12647 %}
12648
12649 // Rotate Left by variable
12650 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12651 %{
12652 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12653 match(Set dst (RotateLeft dst shift));
12654 effect(KILL cr);
12655 format %{ "roll $dst, $shift" %}
12656 ins_encode %{
12657 __ roll($dst$$Register);
12658 %}
12659 ins_pipe(ialu_reg_reg);
12660 %}
12661
12662 // Rotate Left by variable
12663 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12664 %{
12665 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12666 match(Set dst (RotateLeft src shift));
12667 effect(KILL cr);
12668 flag(PD::Flag_ndd_demotable_opr1);
12669
12670 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12671 ins_encode %{
12672 __ eroll($dst$$Register, $src$$Register, false);
12673 %}
12674 ins_pipe(ialu_reg_reg);
12675 %}
12676
12677 // Rotate Right by constant.
12678 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12679 %{
12680 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12681 match(Set dst (RotateRight dst shift));
12682 effect(KILL cr);
12683 format %{ "rorl $dst, $shift" %}
12684 ins_encode %{
12685 __ rorl($dst$$Register, $shift$$constant);
12686 %}
12687 ins_pipe(ialu_reg);
12688 %}
12689
12690 // Rotate Right by constant.
12691 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12692 %{
12693 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12694 match(Set dst (RotateRight src shift));
12695 format %{ "rorxl $dst, $src, $shift" %}
12696 ins_encode %{
12697 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12698 %}
12699 ins_pipe(ialu_reg_reg);
12700 %}
12701
12702 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12703 %{
12704 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12705 match(Set dst (RotateRight (LoadI src) shift));
12706 ins_cost(175);
12707 format %{ "rorxl $dst, $src, $shift" %}
12708 ins_encode %{
12709 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12710 %}
12711 ins_pipe(ialu_reg_mem);
12712 %}
12713
12714 // Rotate Right by variable
12715 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12716 %{
12717 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12718 match(Set dst (RotateRight dst shift));
12719 effect(KILL cr);
12720 format %{ "rorl $dst, $shift" %}
12721 ins_encode %{
12722 __ rorl($dst$$Register);
12723 %}
12724 ins_pipe(ialu_reg_reg);
12725 %}
12726
12727 // Rotate Right by variable
12728 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12729 %{
12730 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12731 match(Set dst (RotateRight src shift));
12732 effect(KILL cr);
12733 flag(PD::Flag_ndd_demotable_opr1);
12734
12735 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12736 ins_encode %{
12737 __ erorl($dst$$Register, $src$$Register, false);
12738 %}
12739 ins_pipe(ialu_reg_reg);
12740 %}
12741
12742 // Rotate Left by constant.
12743 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12744 %{
12745 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12746 match(Set dst (RotateLeft dst shift));
12747 effect(KILL cr);
12748 format %{ "rolq $dst, $shift" %}
12749 ins_encode %{
12750 __ rolq($dst$$Register, $shift$$constant);
12751 %}
12752 ins_pipe(ialu_reg);
12753 %}
12754
12755 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12756 %{
12757 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12758 match(Set dst (RotateLeft src shift));
12759 format %{ "rolxq $dst, $src, $shift" %}
12760 ins_encode %{
12761 int shift = 64 - ($shift$$constant & 63);
12762 __ rorxq($dst$$Register, $src$$Register, shift);
12763 %}
12764 ins_pipe(ialu_reg_reg);
12765 %}
12766
12767 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12768 %{
12769 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12770 match(Set dst (RotateLeft (LoadL src) shift));
12771 ins_cost(175);
12772 format %{ "rolxq $dst, $src, $shift" %}
12773 ins_encode %{
12774 int shift = 64 - ($shift$$constant & 63);
12775 __ rorxq($dst$$Register, $src$$Address, shift);
12776 %}
12777 ins_pipe(ialu_reg_mem);
12778 %}
12779
12780 // Rotate Left by variable
12781 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12782 %{
12783 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12784 match(Set dst (RotateLeft dst shift));
12785 effect(KILL cr);
12786
12787 format %{ "rolq $dst, $shift" %}
12788 ins_encode %{
12789 __ rolq($dst$$Register);
12790 %}
12791 ins_pipe(ialu_reg_reg);
12792 %}
12793
12794 // Rotate Left by variable
12795 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12796 %{
12797 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12798 match(Set dst (RotateLeft src shift));
12799 effect(KILL cr);
12800 flag(PD::Flag_ndd_demotable_opr1);
12801
12802 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12803 ins_encode %{
12804 __ erolq($dst$$Register, $src$$Register, false);
12805 %}
12806 ins_pipe(ialu_reg_reg);
12807 %}
12808
12809 // Rotate Right by constant.
12810 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12811 %{
12812 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12813 match(Set dst (RotateRight dst shift));
12814 effect(KILL cr);
12815 format %{ "rorq $dst, $shift" %}
12816 ins_encode %{
12817 __ rorq($dst$$Register, $shift$$constant);
12818 %}
12819 ins_pipe(ialu_reg);
12820 %}
12821
12822 // Rotate Right by constant
12823 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12824 %{
12825 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12826 match(Set dst (RotateRight src shift));
12827 format %{ "rorxq $dst, $src, $shift" %}
12828 ins_encode %{
12829 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12830 %}
12831 ins_pipe(ialu_reg_reg);
12832 %}
12833
12834 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12835 %{
12836 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12837 match(Set dst (RotateRight (LoadL src) shift));
12838 ins_cost(175);
12839 format %{ "rorxq $dst, $src, $shift" %}
12840 ins_encode %{
12841 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12842 %}
12843 ins_pipe(ialu_reg_mem);
12844 %}
12845
12846 // Rotate Right by variable
12847 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12848 %{
12849 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12850 match(Set dst (RotateRight dst shift));
12851 effect(KILL cr);
12852 format %{ "rorq $dst, $shift" %}
12853 ins_encode %{
12854 __ rorq($dst$$Register);
12855 %}
12856 ins_pipe(ialu_reg_reg);
12857 %}
12858
12859 // Rotate Right by variable
12860 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12861 %{
12862 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12863 match(Set dst (RotateRight src shift));
12864 effect(KILL cr);
12865 flag(PD::Flag_ndd_demotable_opr1);
12866
12867 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12868 ins_encode %{
12869 __ erorq($dst$$Register, $src$$Register, false);
12870 %}
12871 ins_pipe(ialu_reg_reg);
12872 %}
12873
12874 //----------------------------- CompressBits/ExpandBits ------------------------
12875
12876 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12877 predicate(n->bottom_type()->isa_long());
12878 match(Set dst (CompressBits src mask));
12879 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12880 ins_encode %{
12881 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12882 %}
12883 ins_pipe( pipe_slow );
12884 %}
12885
12886 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12887 predicate(n->bottom_type()->isa_long());
12888 match(Set dst (ExpandBits src mask));
12889 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12890 ins_encode %{
12891 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12892 %}
12893 ins_pipe( pipe_slow );
12894 %}
12895
12896 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12897 predicate(n->bottom_type()->isa_long());
12898 match(Set dst (CompressBits src (LoadL mask)));
12899 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12900 ins_encode %{
12901 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12902 %}
12903 ins_pipe( pipe_slow );
12904 %}
12905
12906 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12907 predicate(n->bottom_type()->isa_long());
12908 match(Set dst (ExpandBits src (LoadL mask)));
12909 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12910 ins_encode %{
12911 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12912 %}
12913 ins_pipe( pipe_slow );
12914 %}
12915
12916
12917 // Logical Instructions
12918
12919 // Integer Logical Instructions
12920
12921 // And Instructions
12922 // And Register with Register
12923 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12924 %{
12925 predicate(!UseAPX);
12926 match(Set dst (AndI dst src));
12927 effect(KILL cr);
12928 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12929
12930 format %{ "andl $dst, $src\t# int" %}
12931 ins_encode %{
12932 __ andl($dst$$Register, $src$$Register);
12933 %}
12934 ins_pipe(ialu_reg_reg);
12935 %}
12936
12937 // And Register with Register using New Data Destination (NDD)
12938 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12939 %{
12940 predicate(UseAPX);
12941 match(Set dst (AndI src1 src2));
12942 effect(KILL cr);
12943 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12944
12945 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12946 ins_encode %{
12947 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12948
12949 %}
12950 ins_pipe(ialu_reg_reg);
12951 %}
12952
12953 // And Register with Immediate 255
12954 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12955 %{
12956 match(Set dst (AndI src mask));
12957
12958 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12959 ins_encode %{
12960 __ movzbl($dst$$Register, $src$$Register);
12961 %}
12962 ins_pipe(ialu_reg);
12963 %}
12964
12965 // And Register with Immediate 255 and promote to long
12966 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12967 %{
12968 match(Set dst (ConvI2L (AndI src mask)));
12969
12970 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12971 ins_encode %{
12972 __ movzbl($dst$$Register, $src$$Register);
12973 %}
12974 ins_pipe(ialu_reg);
12975 %}
12976
12977 // And Register with Immediate 65535
12978 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12979 %{
12980 match(Set dst (AndI src mask));
12981
12982 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
12983 ins_encode %{
12984 __ movzwl($dst$$Register, $src$$Register);
12985 %}
12986 ins_pipe(ialu_reg);
12987 %}
12988
12989 // And Register with Immediate 65535 and promote to long
12990 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12991 %{
12992 match(Set dst (ConvI2L (AndI src mask)));
12993
12994 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
12995 ins_encode %{
12996 __ movzwl($dst$$Register, $src$$Register);
12997 %}
12998 ins_pipe(ialu_reg);
12999 %}
13000
13001 // Can skip int2long conversions after AND with small bitmask
13002 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13003 %{
13004 predicate(VM_Version::supports_bmi2());
13005 ins_cost(125);
13006 effect(TEMP tmp, KILL cr);
13007 match(Set dst (ConvI2L (AndI src mask)));
13008 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13009 ins_encode %{
13010 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13011 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13012 %}
13013 ins_pipe(ialu_reg_reg);
13014 %}
13015
13016 // And Register with Immediate
13017 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13018 %{
13019 predicate(!UseAPX);
13020 match(Set dst (AndI dst src));
13021 effect(KILL cr);
13022 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13023
13024 format %{ "andl $dst, $src\t# int" %}
13025 ins_encode %{
13026 __ andl($dst$$Register, $src$$constant);
13027 %}
13028 ins_pipe(ialu_reg);
13029 %}
13030
13031 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13032 %{
13033 predicate(UseAPX);
13034 match(Set dst (AndI src1 src2));
13035 effect(KILL cr);
13036 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13037
13038 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13039 ins_encode %{
13040 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13041 %}
13042 ins_pipe(ialu_reg);
13043 %}
13044
13045 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13046 %{
13047 predicate(UseAPX);
13048 match(Set dst (AndI (LoadI src1) src2));
13049 effect(KILL cr);
13050 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13051
13052 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13053 ins_encode %{
13054 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13055 %}
13056 ins_pipe(ialu_reg);
13057 %}
13058
13059 // And Register with Memory
13060 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13061 %{
13062 predicate(!UseAPX);
13063 match(Set dst (AndI dst (LoadI src)));
13064 effect(KILL cr);
13065 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13066
13067 ins_cost(150);
13068 format %{ "andl $dst, $src\t# int" %}
13069 ins_encode %{
13070 __ andl($dst$$Register, $src$$Address);
13071 %}
13072 ins_pipe(ialu_reg_mem);
13073 %}
13074
13075 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13076 %{
13077 predicate(UseAPX);
13078 match(Set dst (AndI src1 (LoadI src2)));
13079 effect(KILL cr);
13080 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13081
13082 ins_cost(150);
13083 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13084 ins_encode %{
13085 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13086 %}
13087 ins_pipe(ialu_reg_mem);
13088 %}
13089
13090 // And Memory with Register
13091 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13092 %{
13093 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13094 effect(KILL cr);
13095 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13096
13097 ins_cost(150);
13098 format %{ "andb $dst, $src\t# byte" %}
13099 ins_encode %{
13100 __ andb($dst$$Address, $src$$Register);
13101 %}
13102 ins_pipe(ialu_mem_reg);
13103 %}
13104
13105 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13106 %{
13107 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13108 effect(KILL cr);
13109 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110
13111 ins_cost(150);
13112 format %{ "andl $dst, $src\t# int" %}
13113 ins_encode %{
13114 __ andl($dst$$Address, $src$$Register);
13115 %}
13116 ins_pipe(ialu_mem_reg);
13117 %}
13118
13119 // And Memory with Immediate
13120 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13121 %{
13122 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13123 effect(KILL cr);
13124 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13125
13126 ins_cost(125);
13127 format %{ "andl $dst, $src\t# int" %}
13128 ins_encode %{
13129 __ andl($dst$$Address, $src$$constant);
13130 %}
13131 ins_pipe(ialu_mem_imm);
13132 %}
13133
13134 // BMI1 instructions
13135 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13136 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13137 predicate(UseBMI1Instructions);
13138 effect(KILL cr);
13139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13140
13141 ins_cost(125);
13142 format %{ "andnl $dst, $src1, $src2" %}
13143
13144 ins_encode %{
13145 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13146 %}
13147 ins_pipe(ialu_reg_mem);
13148 %}
13149
13150 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13151 match(Set dst (AndI (XorI src1 minus_1) src2));
13152 predicate(UseBMI1Instructions);
13153 effect(KILL cr);
13154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13155
13156 format %{ "andnl $dst, $src1, $src2" %}
13157
13158 ins_encode %{
13159 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13160 %}
13161 ins_pipe(ialu_reg);
13162 %}
13163
13164 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13165 match(Set dst (AndI (SubI imm_zero src) src));
13166 predicate(UseBMI1Instructions);
13167 effect(KILL cr);
13168 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13169
13170 format %{ "blsil $dst, $src" %}
13171
13172 ins_encode %{
13173 __ blsil($dst$$Register, $src$$Register);
13174 %}
13175 ins_pipe(ialu_reg);
13176 %}
13177
13178 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13179 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13180 predicate(UseBMI1Instructions);
13181 effect(KILL cr);
13182 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13183
13184 ins_cost(125);
13185 format %{ "blsil $dst, $src" %}
13186
13187 ins_encode %{
13188 __ blsil($dst$$Register, $src$$Address);
13189 %}
13190 ins_pipe(ialu_reg_mem);
13191 %}
13192
13193 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13194 %{
13195 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13196 predicate(UseBMI1Instructions);
13197 effect(KILL cr);
13198 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13199
13200 ins_cost(125);
13201 format %{ "blsmskl $dst, $src" %}
13202
13203 ins_encode %{
13204 __ blsmskl($dst$$Register, $src$$Address);
13205 %}
13206 ins_pipe(ialu_reg_mem);
13207 %}
13208
13209 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13210 %{
13211 match(Set dst (XorI (AddI src minus_1) src));
13212 predicate(UseBMI1Instructions);
13213 effect(KILL cr);
13214 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13215
13216 format %{ "blsmskl $dst, $src" %}
13217
13218 ins_encode %{
13219 __ blsmskl($dst$$Register, $src$$Register);
13220 %}
13221
13222 ins_pipe(ialu_reg);
13223 %}
13224
13225 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13226 %{
13227 match(Set dst (AndI (AddI src minus_1) src) );
13228 predicate(UseBMI1Instructions);
13229 effect(KILL cr);
13230 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13231
13232 format %{ "blsrl $dst, $src" %}
13233
13234 ins_encode %{
13235 __ blsrl($dst$$Register, $src$$Register);
13236 %}
13237
13238 ins_pipe(ialu_reg_mem);
13239 %}
13240
13241 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13242 %{
13243 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13244 predicate(UseBMI1Instructions);
13245 effect(KILL cr);
13246 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13247
13248 ins_cost(125);
13249 format %{ "blsrl $dst, $src" %}
13250
13251 ins_encode %{
13252 __ blsrl($dst$$Register, $src$$Address);
13253 %}
13254
13255 ins_pipe(ialu_reg);
13256 %}
13257
13258 // Or Instructions
13259 // Or Register with Register
13260 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13261 %{
13262 predicate(!UseAPX);
13263 match(Set dst (OrI dst src));
13264 effect(KILL cr);
13265 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13266
13267 format %{ "orl $dst, $src\t# int" %}
13268 ins_encode %{
13269 __ orl($dst$$Register, $src$$Register);
13270 %}
13271 ins_pipe(ialu_reg_reg);
13272 %}
13273
13274 // Or Register with Register using New Data Destination (NDD)
13275 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13276 %{
13277 predicate(UseAPX);
13278 match(Set dst (OrI src1 src2));
13279 effect(KILL cr);
13280 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13281
13282 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13283 ins_encode %{
13284 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13285 %}
13286 ins_pipe(ialu_reg_reg);
13287 %}
13288
13289 // Or Register with Immediate
13290 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13291 %{
13292 predicate(!UseAPX);
13293 match(Set dst (OrI dst src));
13294 effect(KILL cr);
13295 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13296
13297 format %{ "orl $dst, $src\t# int" %}
13298 ins_encode %{
13299 __ orl($dst$$Register, $src$$constant);
13300 %}
13301 ins_pipe(ialu_reg);
13302 %}
13303
13304 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13305 %{
13306 predicate(UseAPX);
13307 match(Set dst (OrI src1 src2));
13308 effect(KILL cr);
13309 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13310
13311 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13312 ins_encode %{
13313 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13314 %}
13315 ins_pipe(ialu_reg);
13316 %}
13317
13318 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13319 %{
13320 predicate(UseAPX);
13321 match(Set dst (OrI src1 src2));
13322 effect(KILL cr);
13323 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13324
13325 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13326 ins_encode %{
13327 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13328 %}
13329 ins_pipe(ialu_reg);
13330 %}
13331
13332 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13333 %{
13334 predicate(UseAPX);
13335 match(Set dst (OrI (LoadI src1) src2));
13336 effect(KILL cr);
13337 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13338
13339 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13340 ins_encode %{
13341 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13342 %}
13343 ins_pipe(ialu_reg);
13344 %}
13345
13346 // Or Register with Memory
13347 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13348 %{
13349 predicate(!UseAPX);
13350 match(Set dst (OrI dst (LoadI src)));
13351 effect(KILL cr);
13352 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13353
13354 ins_cost(150);
13355 format %{ "orl $dst, $src\t# int" %}
13356 ins_encode %{
13357 __ orl($dst$$Register, $src$$Address);
13358 %}
13359 ins_pipe(ialu_reg_mem);
13360 %}
13361
13362 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13363 %{
13364 predicate(UseAPX);
13365 match(Set dst (OrI src1 (LoadI src2)));
13366 effect(KILL cr);
13367 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13368
13369 ins_cost(150);
13370 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13371 ins_encode %{
13372 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13373 %}
13374 ins_pipe(ialu_reg_mem);
13375 %}
13376
13377 // Or Memory with Register
13378 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13379 %{
13380 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13381 effect(KILL cr);
13382 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13383
13384 ins_cost(150);
13385 format %{ "orb $dst, $src\t# byte" %}
13386 ins_encode %{
13387 __ orb($dst$$Address, $src$$Register);
13388 %}
13389 ins_pipe(ialu_mem_reg);
13390 %}
13391
13392 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13393 %{
13394 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13395 effect(KILL cr);
13396 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13397
13398 ins_cost(150);
13399 format %{ "orl $dst, $src\t# int" %}
13400 ins_encode %{
13401 __ orl($dst$$Address, $src$$Register);
13402 %}
13403 ins_pipe(ialu_mem_reg);
13404 %}
13405
13406 // Or Memory with Immediate
13407 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13408 %{
13409 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13410 effect(KILL cr);
13411 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13412
13413 ins_cost(125);
13414 format %{ "orl $dst, $src\t# int" %}
13415 ins_encode %{
13416 __ orl($dst$$Address, $src$$constant);
13417 %}
13418 ins_pipe(ialu_mem_imm);
13419 %}
13420
13421 // Xor Instructions
13422 // Xor Register with Register
13423 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13424 %{
13425 predicate(!UseAPX);
13426 match(Set dst (XorI dst src));
13427 effect(KILL cr);
13428 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13429
13430 format %{ "xorl $dst, $src\t# int" %}
13431 ins_encode %{
13432 __ xorl($dst$$Register, $src$$Register);
13433 %}
13434 ins_pipe(ialu_reg_reg);
13435 %}
13436
13437 // Xor Register with Register using New Data Destination (NDD)
13438 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13439 %{
13440 predicate(UseAPX);
13441 match(Set dst (XorI src1 src2));
13442 effect(KILL cr);
13443 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13444
13445 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13446 ins_encode %{
13447 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13448 %}
13449 ins_pipe(ialu_reg_reg);
13450 %}
13451
13452 // Xor Register with Immediate -1
13453 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13454 %{
13455 predicate(!UseAPX);
13456 match(Set dst (XorI dst imm));
13457
13458 format %{ "notl $dst" %}
13459 ins_encode %{
13460 __ notl($dst$$Register);
13461 %}
13462 ins_pipe(ialu_reg);
13463 %}
13464
13465 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13466 %{
13467 match(Set dst (XorI src imm));
13468 predicate(UseAPX);
13469 flag(PD::Flag_ndd_demotable_opr1);
13470
13471 format %{ "enotl $dst, $src" %}
13472 ins_encode %{
13473 __ enotl($dst$$Register, $src$$Register);
13474 %}
13475 ins_pipe(ialu_reg);
13476 %}
13477
13478 // Xor Register with Immediate
13479 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13480 %{
13481 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13482 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13483 match(Set dst (XorI dst src));
13484 effect(KILL cr);
13485 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13486
13487 format %{ "xorl $dst, $src\t# int" %}
13488 ins_encode %{
13489 __ xorl($dst$$Register, $src$$constant);
13490 %}
13491 ins_pipe(ialu_reg);
13492 %}
13493
13494 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13495 %{
13496 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13497 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13498 match(Set dst (XorI src1 src2));
13499 effect(KILL cr);
13500 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13501
13502 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13503 ins_encode %{
13504 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13505 %}
13506 ins_pipe(ialu_reg);
13507 %}
13508
13509 // Xor Memory with Immediate
13510 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13511 %{
13512 predicate(UseAPX);
13513 match(Set dst (XorI (LoadI src1) src2));
13514 effect(KILL cr);
13515 ins_cost(150);
13516 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13517
13518 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13519 ins_encode %{
13520 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13521 %}
13522 ins_pipe(ialu_reg);
13523 %}
13524
13525 // Xor Register with Memory
13526 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13527 %{
13528 predicate(!UseAPX);
13529 match(Set dst (XorI dst (LoadI src)));
13530 effect(KILL cr);
13531 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13532
13533 ins_cost(150);
13534 format %{ "xorl $dst, $src\t# int" %}
13535 ins_encode %{
13536 __ xorl($dst$$Register, $src$$Address);
13537 %}
13538 ins_pipe(ialu_reg_mem);
13539 %}
13540
13541 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13542 %{
13543 predicate(UseAPX);
13544 match(Set dst (XorI src1 (LoadI src2)));
13545 effect(KILL cr);
13546 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13547
13548 ins_cost(150);
13549 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13550 ins_encode %{
13551 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13552 %}
13553 ins_pipe(ialu_reg_mem);
13554 %}
13555
13556 // Xor Memory with Register
13557 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13558 %{
13559 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13560 effect(KILL cr);
13561 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562
13563 ins_cost(150);
13564 format %{ "xorb $dst, $src\t# byte" %}
13565 ins_encode %{
13566 __ xorb($dst$$Address, $src$$Register);
13567 %}
13568 ins_pipe(ialu_mem_reg);
13569 %}
13570
13571 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13572 %{
13573 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13574 effect(KILL cr);
13575 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13576
13577 ins_cost(150);
13578 format %{ "xorl $dst, $src\t# int" %}
13579 ins_encode %{
13580 __ xorl($dst$$Address, $src$$Register);
13581 %}
13582 ins_pipe(ialu_mem_reg);
13583 %}
13584
13585 // Xor Memory with Immediate
13586 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13587 %{
13588 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13589 effect(KILL cr);
13590 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13591
13592 ins_cost(125);
13593 format %{ "xorl $dst, $src\t# int" %}
13594 ins_encode %{
13595 __ xorl($dst$$Address, $src$$constant);
13596 %}
13597 ins_pipe(ialu_mem_imm);
13598 %}
13599
13600
13601 // Long Logical Instructions
13602
13603 // And Instructions
13604 // And Register with Register
13605 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13606 %{
13607 predicate(!UseAPX);
13608 match(Set dst (AndL dst src));
13609 effect(KILL cr);
13610 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13611
13612 format %{ "andq $dst, $src\t# long" %}
13613 ins_encode %{
13614 __ andq($dst$$Register, $src$$Register);
13615 %}
13616 ins_pipe(ialu_reg_reg);
13617 %}
13618
13619 // And Register with Register using New Data Destination (NDD)
13620 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13621 %{
13622 predicate(UseAPX);
13623 match(Set dst (AndL src1 src2));
13624 effect(KILL cr);
13625 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13626
13627 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13628 ins_encode %{
13629 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13630
13631 %}
13632 ins_pipe(ialu_reg_reg);
13633 %}
13634
13635 // And Register with Immediate 255
13636 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13637 %{
13638 match(Set dst (AndL src mask));
13639
13640 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13641 ins_encode %{
13642 // movzbl zeroes out the upper 32-bit and does not need REX.W
13643 __ movzbl($dst$$Register, $src$$Register);
13644 %}
13645 ins_pipe(ialu_reg);
13646 %}
13647
13648 // And Register with Immediate 65535
13649 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13650 %{
13651 match(Set dst (AndL src mask));
13652
13653 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13654 ins_encode %{
13655 // movzwl zeroes out the upper 32-bit and does not need REX.W
13656 __ movzwl($dst$$Register, $src$$Register);
13657 %}
13658 ins_pipe(ialu_reg);
13659 %}
13660
13661 // And Register with Immediate
13662 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13663 %{
13664 predicate(!UseAPX);
13665 match(Set dst (AndL dst src));
13666 effect(KILL cr);
13667 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668
13669 format %{ "andq $dst, $src\t# long" %}
13670 ins_encode %{
13671 __ andq($dst$$Register, $src$$constant);
13672 %}
13673 ins_pipe(ialu_reg);
13674 %}
13675
13676 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13677 %{
13678 predicate(UseAPX);
13679 match(Set dst (AndL src1 src2));
13680 effect(KILL cr);
13681 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13682
13683 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13684 ins_encode %{
13685 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13686 %}
13687 ins_pipe(ialu_reg);
13688 %}
13689
13690 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13691 %{
13692 predicate(UseAPX);
13693 match(Set dst (AndL (LoadL src1) src2));
13694 effect(KILL cr);
13695 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13696
13697 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13698 ins_encode %{
13699 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13700 %}
13701 ins_pipe(ialu_reg);
13702 %}
13703
13704 // And Register with Memory
13705 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13706 %{
13707 predicate(!UseAPX);
13708 match(Set dst (AndL dst (LoadL src)));
13709 effect(KILL cr);
13710 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13711
13712 ins_cost(150);
13713 format %{ "andq $dst, $src\t# long" %}
13714 ins_encode %{
13715 __ andq($dst$$Register, $src$$Address);
13716 %}
13717 ins_pipe(ialu_reg_mem);
13718 %}
13719
13720 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13721 %{
13722 predicate(UseAPX);
13723 match(Set dst (AndL src1 (LoadL src2)));
13724 effect(KILL cr);
13725 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13726
13727 ins_cost(150);
13728 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13729 ins_encode %{
13730 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13731 %}
13732 ins_pipe(ialu_reg_mem);
13733 %}
13734
13735 // And Memory with Register
13736 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13737 %{
13738 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13739 effect(KILL cr);
13740 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13741
13742 ins_cost(150);
13743 format %{ "andq $dst, $src\t# long" %}
13744 ins_encode %{
13745 __ andq($dst$$Address, $src$$Register);
13746 %}
13747 ins_pipe(ialu_mem_reg);
13748 %}
13749
13750 // And Memory with Immediate
13751 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13752 %{
13753 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13754 effect(KILL cr);
13755 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13756
13757 ins_cost(125);
13758 format %{ "andq $dst, $src\t# long" %}
13759 ins_encode %{
13760 __ andq($dst$$Address, $src$$constant);
13761 %}
13762 ins_pipe(ialu_mem_imm);
13763 %}
13764
13765 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13766 %{
13767 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13768 // because AND/OR works well enough for 8/32-bit values.
13769 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13770
13771 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13772 effect(KILL cr);
13773
13774 ins_cost(125);
13775 format %{ "btrq $dst, log2(not($con))\t# long" %}
13776 ins_encode %{
13777 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13778 %}
13779 ins_pipe(ialu_mem_imm);
13780 %}
13781
13782 // BMI1 instructions
13783 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13784 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13785 predicate(UseBMI1Instructions);
13786 effect(KILL cr);
13787 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13788
13789 ins_cost(125);
13790 format %{ "andnq $dst, $src1, $src2" %}
13791
13792 ins_encode %{
13793 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13794 %}
13795 ins_pipe(ialu_reg_mem);
13796 %}
13797
13798 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13799 match(Set dst (AndL (XorL src1 minus_1) src2));
13800 predicate(UseBMI1Instructions);
13801 effect(KILL cr);
13802 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13803
13804 format %{ "andnq $dst, $src1, $src2" %}
13805
13806 ins_encode %{
13807 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13808 %}
13809 ins_pipe(ialu_reg_mem);
13810 %}
13811
13812 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13813 match(Set dst (AndL (SubL imm_zero src) src));
13814 predicate(UseBMI1Instructions);
13815 effect(KILL cr);
13816 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13817
13818 format %{ "blsiq $dst, $src" %}
13819
13820 ins_encode %{
13821 __ blsiq($dst$$Register, $src$$Register);
13822 %}
13823 ins_pipe(ialu_reg);
13824 %}
13825
13826 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13827 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13828 predicate(UseBMI1Instructions);
13829 effect(KILL cr);
13830 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13831
13832 ins_cost(125);
13833 format %{ "blsiq $dst, $src" %}
13834
13835 ins_encode %{
13836 __ blsiq($dst$$Register, $src$$Address);
13837 %}
13838 ins_pipe(ialu_reg_mem);
13839 %}
13840
13841 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13842 %{
13843 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13844 predicate(UseBMI1Instructions);
13845 effect(KILL cr);
13846 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13847
13848 ins_cost(125);
13849 format %{ "blsmskq $dst, $src" %}
13850
13851 ins_encode %{
13852 __ blsmskq($dst$$Register, $src$$Address);
13853 %}
13854 ins_pipe(ialu_reg_mem);
13855 %}
13856
13857 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13858 %{
13859 match(Set dst (XorL (AddL src minus_1) src));
13860 predicate(UseBMI1Instructions);
13861 effect(KILL cr);
13862 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13863
13864 format %{ "blsmskq $dst, $src" %}
13865
13866 ins_encode %{
13867 __ blsmskq($dst$$Register, $src$$Register);
13868 %}
13869
13870 ins_pipe(ialu_reg);
13871 %}
13872
13873 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13874 %{
13875 match(Set dst (AndL (AddL src minus_1) src) );
13876 predicate(UseBMI1Instructions);
13877 effect(KILL cr);
13878 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13879
13880 format %{ "blsrq $dst, $src" %}
13881
13882 ins_encode %{
13883 __ blsrq($dst$$Register, $src$$Register);
13884 %}
13885
13886 ins_pipe(ialu_reg);
13887 %}
13888
13889 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13890 %{
13891 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13892 predicate(UseBMI1Instructions);
13893 effect(KILL cr);
13894 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13895
13896 ins_cost(125);
13897 format %{ "blsrq $dst, $src" %}
13898
13899 ins_encode %{
13900 __ blsrq($dst$$Register, $src$$Address);
13901 %}
13902
13903 ins_pipe(ialu_reg);
13904 %}
13905
13906 // Or Instructions
13907 // Or Register with Register
13908 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13909 %{
13910 predicate(!UseAPX);
13911 match(Set dst (OrL dst src));
13912 effect(KILL cr);
13913 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13914
13915 format %{ "orq $dst, $src\t# long" %}
13916 ins_encode %{
13917 __ orq($dst$$Register, $src$$Register);
13918 %}
13919 ins_pipe(ialu_reg_reg);
13920 %}
13921
13922 // Or Register with Register using New Data Destination (NDD)
13923 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13924 %{
13925 predicate(UseAPX);
13926 match(Set dst (OrL src1 src2));
13927 effect(KILL cr);
13928 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13929
13930 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13931 ins_encode %{
13932 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13933
13934 %}
13935 ins_pipe(ialu_reg_reg);
13936 %}
13937
13938 // Use any_RegP to match R15 (TLS register) without spilling.
13939 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13940 match(Set dst (OrL dst (CastP2X src)));
13941 effect(KILL cr);
13942 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13943
13944 format %{ "orq $dst, $src\t# long" %}
13945 ins_encode %{
13946 __ orq($dst$$Register, $src$$Register);
13947 %}
13948 ins_pipe(ialu_reg_reg);
13949 %}
13950
13951 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13952 match(Set dst (OrL src1 (CastP2X src2)));
13953 effect(KILL cr);
13954 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13955
13956 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13957 ins_encode %{
13958 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13959 %}
13960 ins_pipe(ialu_reg_reg);
13961 %}
13962
13963 // Or Register with Immediate
13964 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13965 %{
13966 predicate(!UseAPX);
13967 match(Set dst (OrL dst src));
13968 effect(KILL cr);
13969 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13970
13971 format %{ "orq $dst, $src\t# long" %}
13972 ins_encode %{
13973 __ orq($dst$$Register, $src$$constant);
13974 %}
13975 ins_pipe(ialu_reg);
13976 %}
13977
13978 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13979 %{
13980 predicate(UseAPX);
13981 match(Set dst (OrL src1 src2));
13982 effect(KILL cr);
13983 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13984
13985 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13986 ins_encode %{
13987 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13988 %}
13989 ins_pipe(ialu_reg);
13990 %}
13991
13992 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13993 %{
13994 predicate(UseAPX);
13995 match(Set dst (OrL src1 src2));
13996 effect(KILL cr);
13997 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13998
13999 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14000 ins_encode %{
14001 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14002 %}
14003 ins_pipe(ialu_reg);
14004 %}
14005
14006 // Or Memory with Immediate
14007 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14008 %{
14009 predicate(UseAPX);
14010 match(Set dst (OrL (LoadL src1) src2));
14011 effect(KILL cr);
14012 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14013
14014 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14015 ins_encode %{
14016 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14017 %}
14018 ins_pipe(ialu_reg);
14019 %}
14020
14021 // Or Register with Memory
14022 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14023 %{
14024 predicate(!UseAPX);
14025 match(Set dst (OrL dst (LoadL src)));
14026 effect(KILL cr);
14027 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14028
14029 ins_cost(150);
14030 format %{ "orq $dst, $src\t# long" %}
14031 ins_encode %{
14032 __ orq($dst$$Register, $src$$Address);
14033 %}
14034 ins_pipe(ialu_reg_mem);
14035 %}
14036
14037 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14038 %{
14039 predicate(UseAPX);
14040 match(Set dst (OrL src1 (LoadL src2)));
14041 effect(KILL cr);
14042 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14043
14044 ins_cost(150);
14045 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14046 ins_encode %{
14047 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14048 %}
14049 ins_pipe(ialu_reg_mem);
14050 %}
14051
14052 // Or Memory with Register
14053 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14054 %{
14055 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14056 effect(KILL cr);
14057 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14058
14059 ins_cost(150);
14060 format %{ "orq $dst, $src\t# long" %}
14061 ins_encode %{
14062 __ orq($dst$$Address, $src$$Register);
14063 %}
14064 ins_pipe(ialu_mem_reg);
14065 %}
14066
14067 // Or Memory with Immediate
14068 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14069 %{
14070 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14071 effect(KILL cr);
14072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14073
14074 ins_cost(125);
14075 format %{ "orq $dst, $src\t# long" %}
14076 ins_encode %{
14077 __ orq($dst$$Address, $src$$constant);
14078 %}
14079 ins_pipe(ialu_mem_imm);
14080 %}
14081
14082 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14083 %{
14084 // con should be a pure 64-bit power of 2 immediate
14085 // because AND/OR works well enough for 8/32-bit values.
14086 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14087
14088 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14089 effect(KILL cr);
14090
14091 ins_cost(125);
14092 format %{ "btsq $dst, log2($con)\t# long" %}
14093 ins_encode %{
14094 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14095 %}
14096 ins_pipe(ialu_mem_imm);
14097 %}
14098
14099 // Xor Instructions
14100 // Xor Register with Register
14101 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14102 %{
14103 predicate(!UseAPX);
14104 match(Set dst (XorL dst src));
14105 effect(KILL cr);
14106 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14107
14108 format %{ "xorq $dst, $src\t# long" %}
14109 ins_encode %{
14110 __ xorq($dst$$Register, $src$$Register);
14111 %}
14112 ins_pipe(ialu_reg_reg);
14113 %}
14114
14115 // Xor Register with Register using New Data Destination (NDD)
14116 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14117 %{
14118 predicate(UseAPX);
14119 match(Set dst (XorL src1 src2));
14120 effect(KILL cr);
14121 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14122
14123 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14124 ins_encode %{
14125 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14126 %}
14127 ins_pipe(ialu_reg_reg);
14128 %}
14129
14130 // Xor Register with Immediate -1
14131 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14132 %{
14133 predicate(!UseAPX);
14134 match(Set dst (XorL dst imm));
14135
14136 format %{ "notq $dst" %}
14137 ins_encode %{
14138 __ notq($dst$$Register);
14139 %}
14140 ins_pipe(ialu_reg);
14141 %}
14142
14143 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14144 %{
14145 predicate(UseAPX);
14146 match(Set dst (XorL src imm));
14147 flag(PD::Flag_ndd_demotable_opr1);
14148
14149 format %{ "enotq $dst, $src" %}
14150 ins_encode %{
14151 __ enotq($dst$$Register, $src$$Register);
14152 %}
14153 ins_pipe(ialu_reg);
14154 %}
14155
14156 // Xor Register with Immediate
14157 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14158 %{
14159 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14160 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14161 match(Set dst (XorL dst src));
14162 effect(KILL cr);
14163 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14164
14165 format %{ "xorq $dst, $src\t# long" %}
14166 ins_encode %{
14167 __ xorq($dst$$Register, $src$$constant);
14168 %}
14169 ins_pipe(ialu_reg);
14170 %}
14171
14172 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14173 %{
14174 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14175 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14176 match(Set dst (XorL src1 src2));
14177 effect(KILL cr);
14178 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14179
14180 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14181 ins_encode %{
14182 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14183 %}
14184 ins_pipe(ialu_reg);
14185 %}
14186
14187 // Xor Memory with Immediate
14188 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14189 %{
14190 predicate(UseAPX);
14191 match(Set dst (XorL (LoadL src1) src2));
14192 effect(KILL cr);
14193 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14194 ins_cost(150);
14195
14196 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14197 ins_encode %{
14198 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14199 %}
14200 ins_pipe(ialu_reg);
14201 %}
14202
14203 // Xor Register with Memory
14204 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14205 %{
14206 predicate(!UseAPX);
14207 match(Set dst (XorL dst (LoadL src)));
14208 effect(KILL cr);
14209 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14210
14211 ins_cost(150);
14212 format %{ "xorq $dst, $src\t# long" %}
14213 ins_encode %{
14214 __ xorq($dst$$Register, $src$$Address);
14215 %}
14216 ins_pipe(ialu_reg_mem);
14217 %}
14218
14219 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14220 %{
14221 predicate(UseAPX);
14222 match(Set dst (XorL src1 (LoadL src2)));
14223 effect(KILL cr);
14224 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14225
14226 ins_cost(150);
14227 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14228 ins_encode %{
14229 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14230 %}
14231 ins_pipe(ialu_reg_mem);
14232 %}
14233
14234 // Xor Memory with Register
14235 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14236 %{
14237 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14238 effect(KILL cr);
14239 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14240
14241 ins_cost(150);
14242 format %{ "xorq $dst, $src\t# long" %}
14243 ins_encode %{
14244 __ xorq($dst$$Address, $src$$Register);
14245 %}
14246 ins_pipe(ialu_mem_reg);
14247 %}
14248
14249 // Xor Memory with Immediate
14250 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14251 %{
14252 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14253 effect(KILL cr);
14254 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14255
14256 ins_cost(125);
14257 format %{ "xorq $dst, $src\t# long" %}
14258 ins_encode %{
14259 __ xorq($dst$$Address, $src$$constant);
14260 %}
14261 ins_pipe(ialu_mem_imm);
14262 %}
14263
14264 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14265 %{
14266 match(Set dst (CmpLTMask p q));
14267 effect(KILL cr);
14268
14269 ins_cost(400);
14270 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14271 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14272 "negl $dst" %}
14273 ins_encode %{
14274 __ cmpl($p$$Register, $q$$Register);
14275 __ setcc(Assembler::less, $dst$$Register);
14276 __ negl($dst$$Register);
14277 %}
14278 ins_pipe(pipe_slow);
14279 %}
14280
14281 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14282 %{
14283 match(Set dst (CmpLTMask dst zero));
14284 effect(KILL cr);
14285
14286 ins_cost(100);
14287 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14288 ins_encode %{
14289 __ sarl($dst$$Register, 31);
14290 %}
14291 ins_pipe(ialu_reg);
14292 %}
14293
14294 /* Better to save a register than avoid a branch */
14295 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14296 %{
14297 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14298 effect(KILL cr);
14299 ins_cost(300);
14300 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14301 "jge done\n\t"
14302 "addl $p,$y\n"
14303 "done: " %}
14304 ins_encode %{
14305 Register Rp = $p$$Register;
14306 Register Rq = $q$$Register;
14307 Register Ry = $y$$Register;
14308 Label done;
14309 __ subl(Rp, Rq);
14310 __ jccb(Assembler::greaterEqual, done);
14311 __ addl(Rp, Ry);
14312 __ bind(done);
14313 %}
14314 ins_pipe(pipe_cmplt);
14315 %}
14316
14317 /* Better to save a register than avoid a branch */
14318 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14319 %{
14320 match(Set y (AndI (CmpLTMask p q) y));
14321 effect(KILL cr);
14322
14323 ins_cost(300);
14324
14325 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14326 "jlt done\n\t"
14327 "xorl $y, $y\n"
14328 "done: " %}
14329 ins_encode %{
14330 Register Rp = $p$$Register;
14331 Register Rq = $q$$Register;
14332 Register Ry = $y$$Register;
14333 Label done;
14334 __ cmpl(Rp, Rq);
14335 __ jccb(Assembler::less, done);
14336 __ xorl(Ry, Ry);
14337 __ bind(done);
14338 %}
14339 ins_pipe(pipe_cmplt);
14340 %}
14341
14342
14343 //---------- FP Instructions------------------------------------------------
14344
14345 // Really expensive, avoid
14346 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14347 %{
14348 match(Set cr (CmpF src1 src2));
14349
14350 ins_cost(500);
14351 format %{ "ucomiss $src1, $src2\n\t"
14352 "jnp,s exit\n\t"
14353 "pushfq\t# saw NaN, set CF\n\t"
14354 "andq [rsp], #0xffffff2b\n\t"
14355 "popfq\n"
14356 "exit:" %}
14357 ins_encode %{
14358 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14359 emit_cmpfp_fixup(masm);
14360 %}
14361 ins_pipe(pipe_slow);
14362 %}
14363
14364 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14365 match(Set cr (CmpF src1 src2));
14366
14367 ins_cost(100);
14368 format %{ "ucomiss $src1, $src2" %}
14369 ins_encode %{
14370 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14371 %}
14372 ins_pipe(pipe_slow);
14373 %}
14374
14375 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14376 match(Set cr (CmpF src1 src2));
14377
14378 ins_cost(100);
14379 format %{ "vucomxss $src1, $src2" %}
14380 ins_encode %{
14381 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14382 %}
14383 ins_pipe(pipe_slow);
14384 %}
14385
14386 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14387 match(Set cr (CmpF src1 (LoadF src2)));
14388
14389 ins_cost(100);
14390 format %{ "ucomiss $src1, $src2" %}
14391 ins_encode %{
14392 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14393 %}
14394 ins_pipe(pipe_slow);
14395 %}
14396
14397 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14398 match(Set cr (CmpF src1 (LoadF src2)));
14399
14400 ins_cost(100);
14401 format %{ "vucomxss $src1, $src2" %}
14402 ins_encode %{
14403 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14404 %}
14405 ins_pipe(pipe_slow);
14406 %}
14407
14408 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14409 match(Set cr (CmpF src con));
14410
14411 ins_cost(100);
14412 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14413 ins_encode %{
14414 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14415 %}
14416 ins_pipe(pipe_slow);
14417 %}
14418
14419 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14420 match(Set cr (CmpF src con));
14421
14422 ins_cost(100);
14423 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14424 ins_encode %{
14425 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14426 %}
14427 ins_pipe(pipe_slow);
14428 %}
14429
14430 // Really expensive, avoid
14431 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14432 %{
14433 match(Set cr (CmpD src1 src2));
14434
14435 ins_cost(500);
14436 format %{ "ucomisd $src1, $src2\n\t"
14437 "jnp,s exit\n\t"
14438 "pushfq\t# saw NaN, set CF\n\t"
14439 "andq [rsp], #0xffffff2b\n\t"
14440 "popfq\n"
14441 "exit:" %}
14442 ins_encode %{
14443 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14444 emit_cmpfp_fixup(masm);
14445 %}
14446 ins_pipe(pipe_slow);
14447 %}
14448
14449 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14450 match(Set cr (CmpD src1 src2));
14451
14452 ins_cost(100);
14453 format %{ "ucomisd $src1, $src2 test" %}
14454 ins_encode %{
14455 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14456 %}
14457 ins_pipe(pipe_slow);
14458 %}
14459
14460 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14461 match(Set cr (CmpD src1 src2));
14462
14463 ins_cost(100);
14464 format %{ "vucomxsd $src1, $src2 test" %}
14465 ins_encode %{
14466 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14467 %}
14468 ins_pipe(pipe_slow);
14469 %}
14470
14471 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14472 match(Set cr (CmpD src1 (LoadD src2)));
14473
14474 ins_cost(100);
14475 format %{ "ucomisd $src1, $src2" %}
14476 ins_encode %{
14477 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14478 %}
14479 ins_pipe(pipe_slow);
14480 %}
14481
14482 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14483 match(Set cr (CmpD src1 (LoadD src2)));
14484
14485 ins_cost(100);
14486 format %{ "vucomxsd $src1, $src2" %}
14487 ins_encode %{
14488 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14489 %}
14490 ins_pipe(pipe_slow);
14491 %}
14492
14493 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14494 match(Set cr (CmpD src con));
14495 ins_cost(100);
14496 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14497 ins_encode %{
14498 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14499 %}
14500 ins_pipe(pipe_slow);
14501 %}
14502
14503 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14504 match(Set cr (CmpD src con));
14505
14506 ins_cost(100);
14507 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14508 ins_encode %{
14509 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14510 %}
14511 ins_pipe(pipe_slow);
14512 %}
14513
14514 // Compare into -1,0,1
14515 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14516 %{
14517 match(Set dst (CmpF3 src1 src2));
14518 effect(KILL cr);
14519
14520 ins_cost(275);
14521 format %{ "ucomiss $src1, $src2\n\t"
14522 "movl $dst, #-1\n\t"
14523 "jp,s done\n\t"
14524 "jb,s done\n\t"
14525 "setne $dst\n\t"
14526 "movzbl $dst, $dst\n"
14527 "done:" %}
14528 ins_encode %{
14529 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14530 emit_cmpfp3(masm, $dst$$Register);
14531 %}
14532 ins_pipe(pipe_slow);
14533 %}
14534
14535 // Compare into -1,0,1
14536 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14537 %{
14538 match(Set dst (CmpF3 src1 (LoadF src2)));
14539 effect(KILL cr);
14540
14541 ins_cost(275);
14542 format %{ "ucomiss $src1, $src2\n\t"
14543 "movl $dst, #-1\n\t"
14544 "jp,s done\n\t"
14545 "jb,s done\n\t"
14546 "setne $dst\n\t"
14547 "movzbl $dst, $dst\n"
14548 "done:" %}
14549 ins_encode %{
14550 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14551 emit_cmpfp3(masm, $dst$$Register);
14552 %}
14553 ins_pipe(pipe_slow);
14554 %}
14555
14556 // Compare into -1,0,1
14557 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14558 match(Set dst (CmpF3 src con));
14559 effect(KILL cr);
14560
14561 ins_cost(275);
14562 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14563 "movl $dst, #-1\n\t"
14564 "jp,s done\n\t"
14565 "jb,s done\n\t"
14566 "setne $dst\n\t"
14567 "movzbl $dst, $dst\n"
14568 "done:" %}
14569 ins_encode %{
14570 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14571 emit_cmpfp3(masm, $dst$$Register);
14572 %}
14573 ins_pipe(pipe_slow);
14574 %}
14575
14576 // Compare into -1,0,1
14577 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14578 %{
14579 match(Set dst (CmpD3 src1 src2));
14580 effect(KILL cr);
14581
14582 ins_cost(275);
14583 format %{ "ucomisd $src1, $src2\n\t"
14584 "movl $dst, #-1\n\t"
14585 "jp,s done\n\t"
14586 "jb,s done\n\t"
14587 "setne $dst\n\t"
14588 "movzbl $dst, $dst\n"
14589 "done:" %}
14590 ins_encode %{
14591 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14592 emit_cmpfp3(masm, $dst$$Register);
14593 %}
14594 ins_pipe(pipe_slow);
14595 %}
14596
14597 // Compare into -1,0,1
14598 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14599 %{
14600 match(Set dst (CmpD3 src1 (LoadD src2)));
14601 effect(KILL cr);
14602
14603 ins_cost(275);
14604 format %{ "ucomisd $src1, $src2\n\t"
14605 "movl $dst, #-1\n\t"
14606 "jp,s done\n\t"
14607 "jb,s done\n\t"
14608 "setne $dst\n\t"
14609 "movzbl $dst, $dst\n"
14610 "done:" %}
14611 ins_encode %{
14612 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14613 emit_cmpfp3(masm, $dst$$Register);
14614 %}
14615 ins_pipe(pipe_slow);
14616 %}
14617
14618 // Compare into -1,0,1
14619 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14620 match(Set dst (CmpD3 src con));
14621 effect(KILL cr);
14622
14623 ins_cost(275);
14624 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14625 "movl $dst, #-1\n\t"
14626 "jp,s done\n\t"
14627 "jb,s done\n\t"
14628 "setne $dst\n\t"
14629 "movzbl $dst, $dst\n"
14630 "done:" %}
14631 ins_encode %{
14632 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14633 emit_cmpfp3(masm, $dst$$Register);
14634 %}
14635 ins_pipe(pipe_slow);
14636 %}
14637
14638 //----------Arithmetic Conversion Instructions---------------------------------
14639
14640 instruct convF2D_reg_reg(regD dst, regF src)
14641 %{
14642 match(Set dst (ConvF2D src));
14643
14644 format %{ "cvtss2sd $dst, $src" %}
14645 ins_encode %{
14646 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14647 %}
14648 ins_pipe(pipe_slow); // XXX
14649 %}
14650
14651 instruct convF2D_reg_mem(regD dst, memory src)
14652 %{
14653 predicate(UseAVX == 0);
14654 match(Set dst (ConvF2D (LoadF src)));
14655
14656 format %{ "cvtss2sd $dst, $src" %}
14657 ins_encode %{
14658 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14659 %}
14660 ins_pipe(pipe_slow); // XXX
14661 %}
14662
14663 instruct convD2F_reg_reg(regF dst, regD src)
14664 %{
14665 match(Set dst (ConvD2F src));
14666
14667 format %{ "cvtsd2ss $dst, $src" %}
14668 ins_encode %{
14669 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14670 %}
14671 ins_pipe(pipe_slow); // XXX
14672 %}
14673
14674 instruct convD2F_reg_mem(regF dst, memory src)
14675 %{
14676 predicate(UseAVX == 0);
14677 match(Set dst (ConvD2F (LoadD src)));
14678
14679 format %{ "cvtsd2ss $dst, $src" %}
14680 ins_encode %{
14681 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14682 %}
14683 ins_pipe(pipe_slow); // XXX
14684 %}
14685
14686 // XXX do mem variants
14687 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14688 %{
14689 predicate(!VM_Version::supports_avx10_2());
14690 match(Set dst (ConvF2I src));
14691 effect(KILL cr);
14692 format %{ "convert_f2i $dst, $src" %}
14693 ins_encode %{
14694 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14695 %}
14696 ins_pipe(pipe_slow);
14697 %}
14698
14699 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14700 %{
14701 predicate(VM_Version::supports_avx10_2());
14702 match(Set dst (ConvF2I src));
14703 format %{ "evcvttss2sisl $dst, $src" %}
14704 ins_encode %{
14705 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14706 %}
14707 ins_pipe(pipe_slow);
14708 %}
14709
14710 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14711 %{
14712 predicate(VM_Version::supports_avx10_2());
14713 match(Set dst (ConvF2I (LoadF src)));
14714 format %{ "evcvttss2sisl $dst, $src" %}
14715 ins_encode %{
14716 __ evcvttss2sisl($dst$$Register, $src$$Address);
14717 %}
14718 ins_pipe(pipe_slow);
14719 %}
14720
14721 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14722 %{
14723 predicate(!VM_Version::supports_avx10_2());
14724 match(Set dst (ConvF2L src));
14725 effect(KILL cr);
14726 format %{ "convert_f2l $dst, $src"%}
14727 ins_encode %{
14728 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14729 %}
14730 ins_pipe(pipe_slow);
14731 %}
14732
14733 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14734 %{
14735 predicate(VM_Version::supports_avx10_2());
14736 match(Set dst (ConvF2L src));
14737 format %{ "evcvttss2sisq $dst, $src" %}
14738 ins_encode %{
14739 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14740 %}
14741 ins_pipe(pipe_slow);
14742 %}
14743
14744 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14745 %{
14746 predicate(VM_Version::supports_avx10_2());
14747 match(Set dst (ConvF2L (LoadF src)));
14748 format %{ "evcvttss2sisq $dst, $src" %}
14749 ins_encode %{
14750 __ evcvttss2sisq($dst$$Register, $src$$Address);
14751 %}
14752 ins_pipe(pipe_slow);
14753 %}
14754
14755 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14756 %{
14757 predicate(!VM_Version::supports_avx10_2());
14758 match(Set dst (ConvD2I src));
14759 effect(KILL cr);
14760 format %{ "convert_d2i $dst, $src"%}
14761 ins_encode %{
14762 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14763 %}
14764 ins_pipe(pipe_slow);
14765 %}
14766
14767 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14768 %{
14769 predicate(VM_Version::supports_avx10_2());
14770 match(Set dst (ConvD2I src));
14771 format %{ "evcvttsd2sisl $dst, $src" %}
14772 ins_encode %{
14773 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14774 %}
14775 ins_pipe(pipe_slow);
14776 %}
14777
14778 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14779 %{
14780 predicate(VM_Version::supports_avx10_2());
14781 match(Set dst (ConvD2I (LoadD src)));
14782 format %{ "evcvttsd2sisl $dst, $src" %}
14783 ins_encode %{
14784 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14785 %}
14786 ins_pipe(pipe_slow);
14787 %}
14788
14789 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14790 %{
14791 predicate(!VM_Version::supports_avx10_2());
14792 match(Set dst (ConvD2L src));
14793 effect(KILL cr);
14794 format %{ "convert_d2l $dst, $src"%}
14795 ins_encode %{
14796 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14797 %}
14798 ins_pipe(pipe_slow);
14799 %}
14800
14801 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14802 %{
14803 predicate(VM_Version::supports_avx10_2());
14804 match(Set dst (ConvD2L src));
14805 format %{ "evcvttsd2sisq $dst, $src" %}
14806 ins_encode %{
14807 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14808 %}
14809 ins_pipe(pipe_slow);
14810 %}
14811
14812 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14813 %{
14814 predicate(VM_Version::supports_avx10_2());
14815 match(Set dst (ConvD2L (LoadD src)));
14816 format %{ "evcvttsd2sisq $dst, $src" %}
14817 ins_encode %{
14818 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14819 %}
14820 ins_pipe(pipe_slow);
14821 %}
14822
14823 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14824 %{
14825 match(Set dst (RoundD src));
14826 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14827 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14828 ins_encode %{
14829 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14830 %}
14831 ins_pipe(pipe_slow);
14832 %}
14833
14834 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14835 %{
14836 match(Set dst (RoundF src));
14837 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14838 format %{ "round_float $dst,$src" %}
14839 ins_encode %{
14840 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14841 %}
14842 ins_pipe(pipe_slow);
14843 %}
14844
14845 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14846 %{
14847 predicate(!UseXmmI2F);
14848 match(Set dst (ConvI2F src));
14849
14850 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14851 ins_encode %{
14852 if (UseAVX > 0) {
14853 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14854 }
14855 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14856 %}
14857 ins_pipe(pipe_slow); // XXX
14858 %}
14859
14860 instruct convI2F_reg_mem(regF dst, memory src)
14861 %{
14862 predicate(UseAVX == 0);
14863 match(Set dst (ConvI2F (LoadI src)));
14864
14865 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14866 ins_encode %{
14867 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14868 %}
14869 ins_pipe(pipe_slow); // XXX
14870 %}
14871
14872 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14873 %{
14874 predicate(!UseXmmI2D);
14875 match(Set dst (ConvI2D src));
14876
14877 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14878 ins_encode %{
14879 if (UseAVX > 0) {
14880 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14881 }
14882 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14883 %}
14884 ins_pipe(pipe_slow); // XXX
14885 %}
14886
14887 instruct convI2D_reg_mem(regD dst, memory src)
14888 %{
14889 predicate(UseAVX == 0);
14890 match(Set dst (ConvI2D (LoadI src)));
14891
14892 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14893 ins_encode %{
14894 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14895 %}
14896 ins_pipe(pipe_slow); // XXX
14897 %}
14898
14899 instruct convXI2F_reg(regF dst, rRegI src)
14900 %{
14901 predicate(UseXmmI2F);
14902 match(Set dst (ConvI2F src));
14903
14904 format %{ "movdl $dst, $src\n\t"
14905 "cvtdq2psl $dst, $dst\t# i2f" %}
14906 ins_encode %{
14907 __ movdl($dst$$XMMRegister, $src$$Register);
14908 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14909 %}
14910 ins_pipe(pipe_slow); // XXX
14911 %}
14912
14913 instruct convXI2D_reg(regD dst, rRegI src)
14914 %{
14915 predicate(UseXmmI2D);
14916 match(Set dst (ConvI2D src));
14917
14918 format %{ "movdl $dst, $src\n\t"
14919 "cvtdq2pdl $dst, $dst\t# i2d" %}
14920 ins_encode %{
14921 __ movdl($dst$$XMMRegister, $src$$Register);
14922 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14923 %}
14924 ins_pipe(pipe_slow); // XXX
14925 %}
14926
14927 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14928 %{
14929 match(Set dst (ConvL2F src));
14930
14931 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14932 ins_encode %{
14933 if (UseAVX > 0) {
14934 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14935 }
14936 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14937 %}
14938 ins_pipe(pipe_slow); // XXX
14939 %}
14940
14941 instruct convL2F_reg_mem(regF dst, memory src)
14942 %{
14943 predicate(UseAVX == 0);
14944 match(Set dst (ConvL2F (LoadL src)));
14945
14946 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14947 ins_encode %{
14948 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14949 %}
14950 ins_pipe(pipe_slow); // XXX
14951 %}
14952
14953 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14954 %{
14955 match(Set dst (ConvL2D src));
14956
14957 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14958 ins_encode %{
14959 if (UseAVX > 0) {
14960 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14961 }
14962 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14963 %}
14964 ins_pipe(pipe_slow); // XXX
14965 %}
14966
14967 instruct convL2D_reg_mem(regD dst, memory src)
14968 %{
14969 predicate(UseAVX == 0);
14970 match(Set dst (ConvL2D (LoadL src)));
14971
14972 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14973 ins_encode %{
14974 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14975 %}
14976 ins_pipe(pipe_slow); // XXX
14977 %}
14978
14979 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14980 %{
14981 match(Set dst (ConvI2L src));
14982
14983 ins_cost(125);
14984 format %{ "movslq $dst, $src\t# i2l" %}
14985 ins_encode %{
14986 __ movslq($dst$$Register, $src$$Register);
14987 %}
14988 ins_pipe(ialu_reg_reg);
14989 %}
14990
14991 // Zero-extend convert int to long
14992 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14993 %{
14994 match(Set dst (AndL (ConvI2L src) mask));
14995
14996 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
14997 ins_encode %{
14998 if ($dst$$reg != $src$$reg) {
14999 __ movl($dst$$Register, $src$$Register);
15000 }
15001 %}
15002 ins_pipe(ialu_reg_reg);
15003 %}
15004
15005 // Zero-extend convert int to long
15006 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15007 %{
15008 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15009
15010 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15011 ins_encode %{
15012 __ movl($dst$$Register, $src$$Address);
15013 %}
15014 ins_pipe(ialu_reg_mem);
15015 %}
15016
15017 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15018 %{
15019 match(Set dst (AndL src mask));
15020
15021 format %{ "movl $dst, $src\t# zero-extend long" %}
15022 ins_encode %{
15023 __ movl($dst$$Register, $src$$Register);
15024 %}
15025 ins_pipe(ialu_reg_reg);
15026 %}
15027
15028 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15029 %{
15030 match(Set dst (ConvL2I src));
15031
15032 format %{ "movl $dst, $src\t# l2i" %}
15033 ins_encode %{
15034 __ movl($dst$$Register, $src$$Register);
15035 %}
15036 ins_pipe(ialu_reg_reg);
15037 %}
15038
15039
15040 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15041 match(Set dst (MoveF2I src));
15042 effect(DEF dst, USE src);
15043
15044 ins_cost(125);
15045 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15046 ins_encode %{
15047 __ movl($dst$$Register, Address(rsp, $src$$disp));
15048 %}
15049 ins_pipe(ialu_reg_mem);
15050 %}
15051
15052 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15053 match(Set dst (MoveI2F src));
15054 effect(DEF dst, USE src);
15055
15056 ins_cost(125);
15057 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15058 ins_encode %{
15059 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15060 %}
15061 ins_pipe(pipe_slow);
15062 %}
15063
15064 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15065 match(Set dst (MoveD2L src));
15066 effect(DEF dst, USE src);
15067
15068 ins_cost(125);
15069 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15070 ins_encode %{
15071 __ movq($dst$$Register, Address(rsp, $src$$disp));
15072 %}
15073 ins_pipe(ialu_reg_mem);
15074 %}
15075
15076 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15077 predicate(!UseXmmLoadAndClearUpper);
15078 match(Set dst (MoveL2D src));
15079 effect(DEF dst, USE src);
15080
15081 ins_cost(125);
15082 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15083 ins_encode %{
15084 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15085 %}
15086 ins_pipe(pipe_slow);
15087 %}
15088
15089 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15090 predicate(UseXmmLoadAndClearUpper);
15091 match(Set dst (MoveL2D src));
15092 effect(DEF dst, USE src);
15093
15094 ins_cost(125);
15095 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15096 ins_encode %{
15097 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15098 %}
15099 ins_pipe(pipe_slow);
15100 %}
15101
15102
15103 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15104 match(Set dst (MoveF2I src));
15105 effect(DEF dst, USE src);
15106
15107 ins_cost(95); // XXX
15108 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15109 ins_encode %{
15110 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15111 %}
15112 ins_pipe(pipe_slow);
15113 %}
15114
15115 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15116 match(Set dst (MoveI2F src));
15117 effect(DEF dst, USE src);
15118
15119 ins_cost(100);
15120 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15121 ins_encode %{
15122 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15123 %}
15124 ins_pipe( ialu_mem_reg );
15125 %}
15126
15127 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15128 match(Set dst (MoveD2L src));
15129 effect(DEF dst, USE src);
15130
15131 ins_cost(95); // XXX
15132 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15133 ins_encode %{
15134 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15135 %}
15136 ins_pipe(pipe_slow);
15137 %}
15138
15139 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15140 match(Set dst (MoveL2D src));
15141 effect(DEF dst, USE src);
15142
15143 ins_cost(100);
15144 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15145 ins_encode %{
15146 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15147 %}
15148 ins_pipe(ialu_mem_reg);
15149 %}
15150
15151 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15152 match(Set dst (MoveF2I src));
15153 effect(DEF dst, USE src);
15154 ins_cost(85);
15155 format %{ "movd $dst,$src\t# MoveF2I" %}
15156 ins_encode %{
15157 __ movdl($dst$$Register, $src$$XMMRegister);
15158 %}
15159 ins_pipe( pipe_slow );
15160 %}
15161
15162 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15163 match(Set dst (MoveD2L src));
15164 effect(DEF dst, USE src);
15165 ins_cost(85);
15166 format %{ "movd $dst,$src\t# MoveD2L" %}
15167 ins_encode %{
15168 __ movdq($dst$$Register, $src$$XMMRegister);
15169 %}
15170 ins_pipe( pipe_slow );
15171 %}
15172
15173 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15174 match(Set dst (MoveI2F src));
15175 effect(DEF dst, USE src);
15176 ins_cost(100);
15177 format %{ "movd $dst,$src\t# MoveI2F" %}
15178 ins_encode %{
15179 __ movdl($dst$$XMMRegister, $src$$Register);
15180 %}
15181 ins_pipe( pipe_slow );
15182 %}
15183
15184 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15185 match(Set dst (MoveL2D src));
15186 effect(DEF dst, USE src);
15187 ins_cost(100);
15188 format %{ "movd $dst,$src\t# MoveL2D" %}
15189 ins_encode %{
15190 __ movdq($dst$$XMMRegister, $src$$Register);
15191 %}
15192 ins_pipe( pipe_slow );
15193 %}
15194
15195 // Fast clearing of an array
15196 // Small non-constant lenght ClearArray for non-AVX512 targets.
15197 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15198 Universe dummy, rFlagsReg cr)
15199 %{
15200 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15201 match(Set dummy (ClearArray cnt base));
15202 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15203
15204 format %{ $$template
15205 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15206 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15207 $$emit$$"jg LARGE\n\t"
15208 $$emit$$"dec rcx\n\t"
15209 $$emit$$"js DONE\t# Zero length\n\t"
15210 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15211 $$emit$$"dec rcx\n\t"
15212 $$emit$$"jge LOOP\n\t"
15213 $$emit$$"jmp DONE\n\t"
15214 $$emit$$"# LARGE:\n\t"
15215 if (UseFastStosb) {
15216 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15217 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15218 } else if (UseXMMForObjInit) {
15219 $$emit$$"mov rdi,rax\n\t"
15220 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15221 $$emit$$"jmpq L_zero_64_bytes\n\t"
15222 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15223 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15224 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15225 $$emit$$"add 0x40,rax\n\t"
15226 $$emit$$"# L_zero_64_bytes:\n\t"
15227 $$emit$$"sub 0x8,rcx\n\t"
15228 $$emit$$"jge L_loop\n\t"
15229 $$emit$$"add 0x4,rcx\n\t"
15230 $$emit$$"jl L_tail\n\t"
15231 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15232 $$emit$$"add 0x20,rax\n\t"
15233 $$emit$$"sub 0x4,rcx\n\t"
15234 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15235 $$emit$$"add 0x4,rcx\n\t"
15236 $$emit$$"jle L_end\n\t"
15237 $$emit$$"dec rcx\n\t"
15238 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15239 $$emit$$"vmovq xmm0,(rax)\n\t"
15240 $$emit$$"add 0x8,rax\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"jge L_sloop\n\t"
15243 $$emit$$"# L_end:\n\t"
15244 } else {
15245 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15246 }
15247 $$emit$$"# DONE"
15248 %}
15249 ins_encode %{
15250 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15251 $tmp$$XMMRegister, false, knoreg);
15252 %}
15253 ins_pipe(pipe_slow);
15254 %}
15255
15256 // Small non-constant length ClearArray for AVX512 targets.
15257 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15258 Universe dummy, rFlagsReg cr)
15259 %{
15260 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15261 match(Set dummy (ClearArray cnt base));
15262 ins_cost(125);
15263 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15264
15265 format %{ $$template
15266 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15267 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15268 $$emit$$"jg LARGE\n\t"
15269 $$emit$$"dec rcx\n\t"
15270 $$emit$$"js DONE\t# Zero length\n\t"
15271 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15272 $$emit$$"dec rcx\n\t"
15273 $$emit$$"jge LOOP\n\t"
15274 $$emit$$"jmp DONE\n\t"
15275 $$emit$$"# LARGE:\n\t"
15276 if (UseFastStosb) {
15277 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15278 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15279 } else if (UseXMMForObjInit) {
15280 $$emit$$"mov rdi,rax\n\t"
15281 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15282 $$emit$$"jmpq L_zero_64_bytes\n\t"
15283 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15284 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15285 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15286 $$emit$$"add 0x40,rax\n\t"
15287 $$emit$$"# L_zero_64_bytes:\n\t"
15288 $$emit$$"sub 0x8,rcx\n\t"
15289 $$emit$$"jge L_loop\n\t"
15290 $$emit$$"add 0x4,rcx\n\t"
15291 $$emit$$"jl L_tail\n\t"
15292 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15293 $$emit$$"add 0x20,rax\n\t"
15294 $$emit$$"sub 0x4,rcx\n\t"
15295 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15296 $$emit$$"add 0x4,rcx\n\t"
15297 $$emit$$"jle L_end\n\t"
15298 $$emit$$"dec rcx\n\t"
15299 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15300 $$emit$$"vmovq xmm0,(rax)\n\t"
15301 $$emit$$"add 0x8,rax\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"jge L_sloop\n\t"
15304 $$emit$$"# L_end:\n\t"
15305 } else {
15306 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15307 }
15308 $$emit$$"# DONE"
15309 %}
15310 ins_encode %{
15311 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15312 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15313 %}
15314 ins_pipe(pipe_slow);
15315 %}
15316
15317 // Large non-constant length ClearArray for non-AVX512 targets.
15318 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15319 Universe dummy, rFlagsReg cr)
15320 %{
15321 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15322 match(Set dummy (ClearArray cnt base));
15323 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15324
15325 format %{ $$template
15326 if (UseFastStosb) {
15327 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15328 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15329 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15330 } else if (UseXMMForObjInit) {
15331 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15332 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15333 $$emit$$"jmpq L_zero_64_bytes\n\t"
15334 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15335 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15336 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15337 $$emit$$"add 0x40,rax\n\t"
15338 $$emit$$"# L_zero_64_bytes:\n\t"
15339 $$emit$$"sub 0x8,rcx\n\t"
15340 $$emit$$"jge L_loop\n\t"
15341 $$emit$$"add 0x4,rcx\n\t"
15342 $$emit$$"jl L_tail\n\t"
15343 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15344 $$emit$$"add 0x20,rax\n\t"
15345 $$emit$$"sub 0x4,rcx\n\t"
15346 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15347 $$emit$$"add 0x4,rcx\n\t"
15348 $$emit$$"jle L_end\n\t"
15349 $$emit$$"dec rcx\n\t"
15350 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15351 $$emit$$"vmovq xmm0,(rax)\n\t"
15352 $$emit$$"add 0x8,rax\n\t"
15353 $$emit$$"dec rcx\n\t"
15354 $$emit$$"jge L_sloop\n\t"
15355 $$emit$$"# L_end:\n\t"
15356 } else {
15357 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15358 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15359 }
15360 %}
15361 ins_encode %{
15362 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15363 $tmp$$XMMRegister, true, knoreg);
15364 %}
15365 ins_pipe(pipe_slow);
15366 %}
15367
15368 // Large non-constant length ClearArray for AVX512 targets.
15369 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15370 Universe dummy, rFlagsReg cr)
15371 %{
15372 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15373 match(Set dummy (ClearArray cnt base));
15374 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15375
15376 format %{ $$template
15377 if (UseFastStosb) {
15378 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15379 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15380 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15381 } else if (UseXMMForObjInit) {
15382 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15383 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15384 $$emit$$"jmpq L_zero_64_bytes\n\t"
15385 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15386 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15387 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15388 $$emit$$"add 0x40,rax\n\t"
15389 $$emit$$"# L_zero_64_bytes:\n\t"
15390 $$emit$$"sub 0x8,rcx\n\t"
15391 $$emit$$"jge L_loop\n\t"
15392 $$emit$$"add 0x4,rcx\n\t"
15393 $$emit$$"jl L_tail\n\t"
15394 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15395 $$emit$$"add 0x20,rax\n\t"
15396 $$emit$$"sub 0x4,rcx\n\t"
15397 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15398 $$emit$$"add 0x4,rcx\n\t"
15399 $$emit$$"jle L_end\n\t"
15400 $$emit$$"dec rcx\n\t"
15401 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15402 $$emit$$"vmovq xmm0,(rax)\n\t"
15403 $$emit$$"add 0x8,rax\n\t"
15404 $$emit$$"dec rcx\n\t"
15405 $$emit$$"jge L_sloop\n\t"
15406 $$emit$$"# L_end:\n\t"
15407 } else {
15408 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15409 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15410 }
15411 %}
15412 ins_encode %{
15413 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15414 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15415 %}
15416 ins_pipe(pipe_slow);
15417 %}
15418
15419 // Small constant length ClearArray for AVX512 targets.
15420 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15421 %{
15422 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15423 match(Set dummy (ClearArray cnt base));
15424 ins_cost(100);
15425 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15426 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15427 ins_encode %{
15428 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15429 %}
15430 ins_pipe(pipe_slow);
15431 %}
15432
15433 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15434 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15435 %{
15436 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15437 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15438 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15439
15440 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15441 ins_encode %{
15442 __ string_compare($str1$$Register, $str2$$Register,
15443 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15444 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15445 %}
15446 ins_pipe( pipe_slow );
15447 %}
15448
15449 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15450 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15451 %{
15452 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15453 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15454 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15455
15456 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15457 ins_encode %{
15458 __ string_compare($str1$$Register, $str2$$Register,
15459 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15460 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15461 %}
15462 ins_pipe( pipe_slow );
15463 %}
15464
15465 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15466 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15467 %{
15468 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15469 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15470 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15471
15472 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15473 ins_encode %{
15474 __ string_compare($str1$$Register, $str2$$Register,
15475 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15476 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15477 %}
15478 ins_pipe( pipe_slow );
15479 %}
15480
15481 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15482 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15483 %{
15484 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15485 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15486 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15487
15488 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15489 ins_encode %{
15490 __ string_compare($str1$$Register, $str2$$Register,
15491 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15492 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15493 %}
15494 ins_pipe( pipe_slow );
15495 %}
15496
15497 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15498 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15499 %{
15500 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15501 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15502 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15503
15504 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15505 ins_encode %{
15506 __ string_compare($str1$$Register, $str2$$Register,
15507 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15508 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15509 %}
15510 ins_pipe( pipe_slow );
15511 %}
15512
15513 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15514 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15515 %{
15516 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15517 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15518 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15519
15520 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15521 ins_encode %{
15522 __ string_compare($str1$$Register, $str2$$Register,
15523 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15524 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15525 %}
15526 ins_pipe( pipe_slow );
15527 %}
15528
15529 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15530 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15531 %{
15532 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15533 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15534 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15535
15536 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15537 ins_encode %{
15538 __ string_compare($str2$$Register, $str1$$Register,
15539 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15540 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15541 %}
15542 ins_pipe( pipe_slow );
15543 %}
15544
15545 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15546 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15547 %{
15548 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15549 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15550 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15551
15552 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15553 ins_encode %{
15554 __ string_compare($str2$$Register, $str1$$Register,
15555 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15556 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15557 %}
15558 ins_pipe( pipe_slow );
15559 %}
15560
15561 // fast search of substring with known size.
15562 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15563 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15564 %{
15565 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15566 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15567 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15568
15569 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15570 ins_encode %{
15571 int icnt2 = (int)$int_cnt2$$constant;
15572 if (icnt2 >= 16) {
15573 // IndexOf for constant substrings with size >= 16 elements
15574 // which don't need to be loaded through stack.
15575 __ string_indexofC8($str1$$Register, $str2$$Register,
15576 $cnt1$$Register, $cnt2$$Register,
15577 icnt2, $result$$Register,
15578 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15579 } else {
15580 // Small strings are loaded through stack if they cross page boundary.
15581 __ string_indexof($str1$$Register, $str2$$Register,
15582 $cnt1$$Register, $cnt2$$Register,
15583 icnt2, $result$$Register,
15584 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15585 }
15586 %}
15587 ins_pipe( pipe_slow );
15588 %}
15589
15590 // fast search of substring with known size.
15591 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15592 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15593 %{
15594 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15595 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15596 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15597
15598 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15599 ins_encode %{
15600 int icnt2 = (int)$int_cnt2$$constant;
15601 if (icnt2 >= 8) {
15602 // IndexOf for constant substrings with size >= 8 elements
15603 // which don't need to be loaded through stack.
15604 __ string_indexofC8($str1$$Register, $str2$$Register,
15605 $cnt1$$Register, $cnt2$$Register,
15606 icnt2, $result$$Register,
15607 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15608 } else {
15609 // Small strings are loaded through stack if they cross page boundary.
15610 __ string_indexof($str1$$Register, $str2$$Register,
15611 $cnt1$$Register, $cnt2$$Register,
15612 icnt2, $result$$Register,
15613 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15614 }
15615 %}
15616 ins_pipe( pipe_slow );
15617 %}
15618
15619 // fast search of substring with known size.
15620 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15621 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15622 %{
15623 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15624 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15625 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15626
15627 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15628 ins_encode %{
15629 int icnt2 = (int)$int_cnt2$$constant;
15630 if (icnt2 >= 8) {
15631 // IndexOf for constant substrings with size >= 8 elements
15632 // which don't need to be loaded through stack.
15633 __ string_indexofC8($str1$$Register, $str2$$Register,
15634 $cnt1$$Register, $cnt2$$Register,
15635 icnt2, $result$$Register,
15636 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15637 } else {
15638 // Small strings are loaded through stack if they cross page boundary.
15639 __ string_indexof($str1$$Register, $str2$$Register,
15640 $cnt1$$Register, $cnt2$$Register,
15641 icnt2, $result$$Register,
15642 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15643 }
15644 %}
15645 ins_pipe( pipe_slow );
15646 %}
15647
15648 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15649 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15650 %{
15651 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15652 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15653 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15654
15655 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15656 ins_encode %{
15657 __ string_indexof($str1$$Register, $str2$$Register,
15658 $cnt1$$Register, $cnt2$$Register,
15659 (-1), $result$$Register,
15660 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15661 %}
15662 ins_pipe( pipe_slow );
15663 %}
15664
15665 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15666 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15667 %{
15668 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15669 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15670 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15671
15672 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15673 ins_encode %{
15674 __ string_indexof($str1$$Register, $str2$$Register,
15675 $cnt1$$Register, $cnt2$$Register,
15676 (-1), $result$$Register,
15677 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15678 %}
15679 ins_pipe( pipe_slow );
15680 %}
15681
15682 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15683 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15684 %{
15685 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15686 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15687 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15688
15689 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15690 ins_encode %{
15691 __ string_indexof($str1$$Register, $str2$$Register,
15692 $cnt1$$Register, $cnt2$$Register,
15693 (-1), $result$$Register,
15694 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15695 %}
15696 ins_pipe( pipe_slow );
15697 %}
15698
15699 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15700 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15701 %{
15702 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15703 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15704 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15705 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15706 ins_encode %{
15707 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15708 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15709 %}
15710 ins_pipe( pipe_slow );
15711 %}
15712
15713 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15714 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15715 %{
15716 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15717 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15718 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15719 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15720 ins_encode %{
15721 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15722 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15723 %}
15724 ins_pipe( pipe_slow );
15725 %}
15726
15727 // fast string equals
15728 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15729 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15730 %{
15731 predicate(!VM_Version::supports_avx512vlbw());
15732 match(Set result (StrEquals (Binary str1 str2) cnt));
15733 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15734
15735 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15736 ins_encode %{
15737 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15738 $cnt$$Register, $result$$Register, $tmp3$$Register,
15739 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15740 %}
15741 ins_pipe( pipe_slow );
15742 %}
15743
15744 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15745 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15746 %{
15747 predicate(VM_Version::supports_avx512vlbw());
15748 match(Set result (StrEquals (Binary str1 str2) cnt));
15749 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15750
15751 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15752 ins_encode %{
15753 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15754 $cnt$$Register, $result$$Register, $tmp3$$Register,
15755 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15756 %}
15757 ins_pipe( pipe_slow );
15758 %}
15759
15760 // fast array equals
15761 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15762 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15763 %{
15764 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15765 match(Set result (AryEq ary1 ary2));
15766 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15767
15768 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15769 ins_encode %{
15770 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15771 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15772 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15773 %}
15774 ins_pipe( pipe_slow );
15775 %}
15776
15777 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15778 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15779 %{
15780 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15781 match(Set result (AryEq ary1 ary2));
15782 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15783
15784 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15785 ins_encode %{
15786 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15787 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15788 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15789 %}
15790 ins_pipe( pipe_slow );
15791 %}
15792
15793 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15794 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15795 %{
15796 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15797 match(Set result (AryEq ary1 ary2));
15798 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15799
15800 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15801 ins_encode %{
15802 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15803 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15804 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15805 %}
15806 ins_pipe( pipe_slow );
15807 %}
15808
15809 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15810 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15811 %{
15812 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15813 match(Set result (AryEq ary1 ary2));
15814 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15815
15816 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15817 ins_encode %{
15818 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15819 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15820 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15821 %}
15822 ins_pipe( pipe_slow );
15823 %}
15824
15825 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15826 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15827 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15828 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15829 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15830 %{
15831 predicate(UseAVX >= 2);
15832 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15833 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15834 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15835 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15836 USE basic_type, KILL cr);
15837
15838 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15839 ins_encode %{
15840 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15841 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15842 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15843 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15844 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15845 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15846 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15847 %}
15848 ins_pipe( pipe_slow );
15849 %}
15850
15851 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15852 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15853 %{
15854 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15855 match(Set result (CountPositives ary1 len));
15856 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15857
15858 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15859 ins_encode %{
15860 __ count_positives($ary1$$Register, $len$$Register,
15861 $result$$Register, $tmp3$$Register,
15862 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15863 %}
15864 ins_pipe( pipe_slow );
15865 %}
15866
15867 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15868 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15869 %{
15870 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15871 match(Set result (CountPositives ary1 len));
15872 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15873
15874 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15875 ins_encode %{
15876 __ count_positives($ary1$$Register, $len$$Register,
15877 $result$$Register, $tmp3$$Register,
15878 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15879 %}
15880 ins_pipe( pipe_slow );
15881 %}
15882
15883 // fast char[] to byte[] compression
15884 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15885 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15886 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15887 match(Set result (StrCompressedCopy src (Binary dst len)));
15888 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15889 USE_KILL len, KILL tmp5, KILL cr);
15890
15891 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15892 ins_encode %{
15893 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15894 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15895 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15896 knoreg, knoreg);
15897 %}
15898 ins_pipe( pipe_slow );
15899 %}
15900
15901 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15902 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15903 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15904 match(Set result (StrCompressedCopy src (Binary dst len)));
15905 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15906 USE_KILL len, KILL tmp5, KILL cr);
15907
15908 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15909 ins_encode %{
15910 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15911 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15912 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15913 $ktmp1$$KRegister, $ktmp2$$KRegister);
15914 %}
15915 ins_pipe( pipe_slow );
15916 %}
15917 // fast byte[] to char[] inflation
15918 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15919 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15920 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15921 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15922 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15923
15924 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15925 ins_encode %{
15926 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15927 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15928 %}
15929 ins_pipe( pipe_slow );
15930 %}
15931
15932 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15933 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15934 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15935 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15936 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15937
15938 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15939 ins_encode %{
15940 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15941 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15942 %}
15943 ins_pipe( pipe_slow );
15944 %}
15945
15946 // encode char[] to byte[] in ISO_8859_1
15947 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15948 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15949 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15950 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15951 match(Set result (EncodeISOArray src (Binary dst len)));
15952 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15953
15954 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15955 ins_encode %{
15956 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15957 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15958 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15959 %}
15960 ins_pipe( pipe_slow );
15961 %}
15962
15963 // encode char[] to byte[] in ASCII
15964 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15965 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15966 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15967 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15968 match(Set result (EncodeISOArray src (Binary dst len)));
15969 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15970
15971 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15972 ins_encode %{
15973 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15974 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15975 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15976 %}
15977 ins_pipe( pipe_slow );
15978 %}
15979
15980 //----------Overflow Math Instructions-----------------------------------------
15981
15982 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15983 %{
15984 match(Set cr (OverflowAddI op1 op2));
15985 effect(DEF cr, USE_KILL op1, USE op2);
15986
15987 format %{ "addl $op1, $op2\t# overflow check int" %}
15988
15989 ins_encode %{
15990 __ addl($op1$$Register, $op2$$Register);
15991 %}
15992 ins_pipe(ialu_reg_reg);
15993 %}
15994
15995 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15996 %{
15997 match(Set cr (OverflowAddI op1 op2));
15998 effect(DEF cr, USE_KILL op1, USE op2);
15999
16000 format %{ "addl $op1, $op2\t# overflow check int" %}
16001
16002 ins_encode %{
16003 __ addl($op1$$Register, $op2$$constant);
16004 %}
16005 ins_pipe(ialu_reg_reg);
16006 %}
16007
16008 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16009 %{
16010 match(Set cr (OverflowAddL op1 op2));
16011 effect(DEF cr, USE_KILL op1, USE op2);
16012
16013 format %{ "addq $op1, $op2\t# overflow check long" %}
16014 ins_encode %{
16015 __ addq($op1$$Register, $op2$$Register);
16016 %}
16017 ins_pipe(ialu_reg_reg);
16018 %}
16019
16020 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16021 %{
16022 match(Set cr (OverflowAddL op1 op2));
16023 effect(DEF cr, USE_KILL op1, USE op2);
16024
16025 format %{ "addq $op1, $op2\t# overflow check long" %}
16026 ins_encode %{
16027 __ addq($op1$$Register, $op2$$constant);
16028 %}
16029 ins_pipe(ialu_reg_reg);
16030 %}
16031
16032 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16033 %{
16034 match(Set cr (OverflowSubI op1 op2));
16035
16036 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16037 ins_encode %{
16038 __ cmpl($op1$$Register, $op2$$Register);
16039 %}
16040 ins_pipe(ialu_reg_reg);
16041 %}
16042
16043 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16044 %{
16045 match(Set cr (OverflowSubI op1 op2));
16046
16047 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16048 ins_encode %{
16049 __ cmpl($op1$$Register, $op2$$constant);
16050 %}
16051 ins_pipe(ialu_reg_reg);
16052 %}
16053
16054 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16055 %{
16056 match(Set cr (OverflowSubL op1 op2));
16057
16058 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16059 ins_encode %{
16060 __ cmpq($op1$$Register, $op2$$Register);
16061 %}
16062 ins_pipe(ialu_reg_reg);
16063 %}
16064
16065 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16066 %{
16067 match(Set cr (OverflowSubL op1 op2));
16068
16069 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16070 ins_encode %{
16071 __ cmpq($op1$$Register, $op2$$constant);
16072 %}
16073 ins_pipe(ialu_reg_reg);
16074 %}
16075
16076 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16077 %{
16078 match(Set cr (OverflowSubI zero op2));
16079 effect(DEF cr, USE_KILL op2);
16080
16081 format %{ "negl $op2\t# overflow check int" %}
16082 ins_encode %{
16083 __ negl($op2$$Register);
16084 %}
16085 ins_pipe(ialu_reg_reg);
16086 %}
16087
16088 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16089 %{
16090 match(Set cr (OverflowSubL zero op2));
16091 effect(DEF cr, USE_KILL op2);
16092
16093 format %{ "negq $op2\t# overflow check long" %}
16094 ins_encode %{
16095 __ negq($op2$$Register);
16096 %}
16097 ins_pipe(ialu_reg_reg);
16098 %}
16099
16100 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16101 %{
16102 match(Set cr (OverflowMulI op1 op2));
16103 effect(DEF cr, USE_KILL op1, USE op2);
16104
16105 format %{ "imull $op1, $op2\t# overflow check int" %}
16106 ins_encode %{
16107 __ imull($op1$$Register, $op2$$Register);
16108 %}
16109 ins_pipe(ialu_reg_reg_alu0);
16110 %}
16111
16112 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16113 %{
16114 match(Set cr (OverflowMulI op1 op2));
16115 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16116
16117 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16118 ins_encode %{
16119 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16120 %}
16121 ins_pipe(ialu_reg_reg_alu0);
16122 %}
16123
16124 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16125 %{
16126 match(Set cr (OverflowMulL op1 op2));
16127 effect(DEF cr, USE_KILL op1, USE op2);
16128
16129 format %{ "imulq $op1, $op2\t# overflow check long" %}
16130 ins_encode %{
16131 __ imulq($op1$$Register, $op2$$Register);
16132 %}
16133 ins_pipe(ialu_reg_reg_alu0);
16134 %}
16135
16136 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16137 %{
16138 match(Set cr (OverflowMulL op1 op2));
16139 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16140
16141 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16142 ins_encode %{
16143 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16144 %}
16145 ins_pipe(ialu_reg_reg_alu0);
16146 %}
16147
16148
16149 //----------Control Flow Instructions------------------------------------------
16150 // Signed compare Instructions
16151
16152 // XXX more variants!!
16153 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16154 %{
16155 match(Set cr (CmpI op1 op2));
16156 effect(DEF cr, USE op1, USE op2);
16157
16158 format %{ "cmpl $op1, $op2" %}
16159 ins_encode %{
16160 __ cmpl($op1$$Register, $op2$$Register);
16161 %}
16162 ins_pipe(ialu_cr_reg_reg);
16163 %}
16164
16165 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16166 %{
16167 match(Set cr (CmpI op1 op2));
16168
16169 format %{ "cmpl $op1, $op2" %}
16170 ins_encode %{
16171 __ cmpl($op1$$Register, $op2$$constant);
16172 %}
16173 ins_pipe(ialu_cr_reg_imm);
16174 %}
16175
16176 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16177 %{
16178 match(Set cr (CmpI op1 (LoadI op2)));
16179
16180 ins_cost(500); // XXX
16181 format %{ "cmpl $op1, $op2" %}
16182 ins_encode %{
16183 __ cmpl($op1$$Register, $op2$$Address);
16184 %}
16185 ins_pipe(ialu_cr_reg_mem);
16186 %}
16187
16188 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16189 %{
16190 match(Set cr (CmpI src zero));
16191
16192 format %{ "testl $src, $src" %}
16193 ins_encode %{
16194 __ testl($src$$Register, $src$$Register);
16195 %}
16196 ins_pipe(ialu_cr_reg_imm);
16197 %}
16198
16199 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16200 %{
16201 match(Set cr (CmpI (AndI src con) zero));
16202
16203 format %{ "testl $src, $con" %}
16204 ins_encode %{
16205 __ testl($src$$Register, $con$$constant);
16206 %}
16207 ins_pipe(ialu_cr_reg_imm);
16208 %}
16209
16210 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16211 %{
16212 match(Set cr (CmpI (AndI src1 src2) zero));
16213
16214 format %{ "testl $src1, $src2" %}
16215 ins_encode %{
16216 __ testl($src1$$Register, $src2$$Register);
16217 %}
16218 ins_pipe(ialu_cr_reg_imm);
16219 %}
16220
16221 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16222 %{
16223 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16224
16225 format %{ "testl $src, $mem" %}
16226 ins_encode %{
16227 __ testl($src$$Register, $mem$$Address);
16228 %}
16229 ins_pipe(ialu_cr_reg_mem);
16230 %}
16231
16232 // Unsigned compare Instructions; really, same as signed except they
16233 // produce an rFlagsRegU instead of rFlagsReg.
16234 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16235 %{
16236 match(Set cr (CmpU op1 op2));
16237
16238 format %{ "cmpl $op1, $op2\t# unsigned" %}
16239 ins_encode %{
16240 __ cmpl($op1$$Register, $op2$$Register);
16241 %}
16242 ins_pipe(ialu_cr_reg_reg);
16243 %}
16244
16245 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16246 %{
16247 match(Set cr (CmpU op1 op2));
16248
16249 format %{ "cmpl $op1, $op2\t# unsigned" %}
16250 ins_encode %{
16251 __ cmpl($op1$$Register, $op2$$constant);
16252 %}
16253 ins_pipe(ialu_cr_reg_imm);
16254 %}
16255
16256 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16257 %{
16258 match(Set cr (CmpU op1 (LoadI op2)));
16259
16260 ins_cost(500); // XXX
16261 format %{ "cmpl $op1, $op2\t# unsigned" %}
16262 ins_encode %{
16263 __ cmpl($op1$$Register, $op2$$Address);
16264 %}
16265 ins_pipe(ialu_cr_reg_mem);
16266 %}
16267
16268 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16269 %{
16270 match(Set cr (CmpU src zero));
16271
16272 format %{ "testl $src, $src\t# unsigned" %}
16273 ins_encode %{
16274 __ testl($src$$Register, $src$$Register);
16275 %}
16276 ins_pipe(ialu_cr_reg_imm);
16277 %}
16278
16279 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16280 %{
16281 match(Set cr (CmpP op1 op2));
16282
16283 format %{ "cmpq $op1, $op2\t# ptr" %}
16284 ins_encode %{
16285 __ cmpq($op1$$Register, $op2$$Register);
16286 %}
16287 ins_pipe(ialu_cr_reg_reg);
16288 %}
16289
16290 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16291 %{
16292 match(Set cr (CmpP op1 (LoadP op2)));
16293 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16294
16295 ins_cost(500); // XXX
16296 format %{ "cmpq $op1, $op2\t# ptr" %}
16297 ins_encode %{
16298 __ cmpq($op1$$Register, $op2$$Address);
16299 %}
16300 ins_pipe(ialu_cr_reg_mem);
16301 %}
16302
16303 // XXX this is generalized by compP_rReg_mem???
16304 // Compare raw pointer (used in out-of-heap check).
16305 // Only works because non-oop pointers must be raw pointers
16306 // and raw pointers have no anti-dependencies.
16307 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16308 %{
16309 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16310 n->in(2)->as_Load()->barrier_data() == 0);
16311 match(Set cr (CmpP op1 (LoadP op2)));
16312
16313 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16314 ins_encode %{
16315 __ cmpq($op1$$Register, $op2$$Address);
16316 %}
16317 ins_pipe(ialu_cr_reg_mem);
16318 %}
16319
16320 // This will generate a signed flags result. This should be OK since
16321 // any compare to a zero should be eq/neq.
16322 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16323 %{
16324 match(Set cr (CmpP src zero));
16325
16326 format %{ "testq $src, $src\t# ptr" %}
16327 ins_encode %{
16328 __ testq($src$$Register, $src$$Register);
16329 %}
16330 ins_pipe(ialu_cr_reg_imm);
16331 %}
16332
16333 // This will generate a signed flags result. This should be OK since
16334 // any compare to a zero should be eq/neq.
16335 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16336 %{
16337 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16338 n->in(1)->as_Load()->barrier_data() == 0);
16339 match(Set cr (CmpP (LoadP op) zero));
16340
16341 ins_cost(500); // XXX
16342 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16343 ins_encode %{
16344 __ testq($op$$Address, 0xFFFFFFFF);
16345 %}
16346 ins_pipe(ialu_cr_reg_imm);
16347 %}
16348
16349 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16350 %{
16351 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16352 n->in(1)->as_Load()->barrier_data() == 0);
16353 match(Set cr (CmpP (LoadP mem) zero));
16354
16355 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16356 ins_encode %{
16357 __ cmpq(r12, $mem$$Address);
16358 %}
16359 ins_pipe(ialu_cr_reg_mem);
16360 %}
16361
16362 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16363 %{
16364 match(Set cr (CmpN op1 op2));
16365
16366 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16367 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16368 ins_pipe(ialu_cr_reg_reg);
16369 %}
16370
16371 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16372 %{
16373 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16374 match(Set cr (CmpN src (LoadN mem)));
16375
16376 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16377 ins_encode %{
16378 __ cmpl($src$$Register, $mem$$Address);
16379 %}
16380 ins_pipe(ialu_cr_reg_mem);
16381 %}
16382
16383 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16384 match(Set cr (CmpN op1 op2));
16385
16386 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16387 ins_encode %{
16388 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16389 %}
16390 ins_pipe(ialu_cr_reg_imm);
16391 %}
16392
16393 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16394 %{
16395 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16396 match(Set cr (CmpN src (LoadN mem)));
16397
16398 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16399 ins_encode %{
16400 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16401 %}
16402 ins_pipe(ialu_cr_reg_mem);
16403 %}
16404
16405 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16406 match(Set cr (CmpN op1 op2));
16407
16408 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16409 ins_encode %{
16410 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16411 %}
16412 ins_pipe(ialu_cr_reg_imm);
16413 %}
16414
16415 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16416 %{
16417 predicate(!UseCompactObjectHeaders);
16418 match(Set cr (CmpN src (LoadNKlass mem)));
16419
16420 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16421 ins_encode %{
16422 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16423 %}
16424 ins_pipe(ialu_cr_reg_mem);
16425 %}
16426
16427 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16428 match(Set cr (CmpN src zero));
16429
16430 format %{ "testl $src, $src\t# compressed ptr" %}
16431 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16432 ins_pipe(ialu_cr_reg_imm);
16433 %}
16434
16435 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16436 %{
16437 predicate(CompressedOops::base() != nullptr &&
16438 n->in(1)->as_Load()->barrier_data() == 0);
16439 match(Set cr (CmpN (LoadN mem) zero));
16440
16441 ins_cost(500); // XXX
16442 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16443 ins_encode %{
16444 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16445 %}
16446 ins_pipe(ialu_cr_reg_mem);
16447 %}
16448
16449 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16450 %{
16451 predicate(CompressedOops::base() == nullptr &&
16452 n->in(1)->as_Load()->barrier_data() == 0);
16453 match(Set cr (CmpN (LoadN mem) zero));
16454
16455 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16456 ins_encode %{
16457 __ cmpl(r12, $mem$$Address);
16458 %}
16459 ins_pipe(ialu_cr_reg_mem);
16460 %}
16461
16462 // Yanked all unsigned pointer compare operations.
16463 // Pointer compares are done with CmpP which is already unsigned.
16464
16465 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16466 %{
16467 match(Set cr (CmpL op1 op2));
16468
16469 format %{ "cmpq $op1, $op2" %}
16470 ins_encode %{
16471 __ cmpq($op1$$Register, $op2$$Register);
16472 %}
16473 ins_pipe(ialu_cr_reg_reg);
16474 %}
16475
16476 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16477 %{
16478 match(Set cr (CmpL op1 op2));
16479
16480 format %{ "cmpq $op1, $op2" %}
16481 ins_encode %{
16482 __ cmpq($op1$$Register, $op2$$constant);
16483 %}
16484 ins_pipe(ialu_cr_reg_imm);
16485 %}
16486
16487 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16488 %{
16489 match(Set cr (CmpL op1 (LoadL op2)));
16490
16491 format %{ "cmpq $op1, $op2" %}
16492 ins_encode %{
16493 __ cmpq($op1$$Register, $op2$$Address);
16494 %}
16495 ins_pipe(ialu_cr_reg_mem);
16496 %}
16497
16498 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16499 %{
16500 match(Set cr (CmpL src zero));
16501
16502 format %{ "testq $src, $src" %}
16503 ins_encode %{
16504 __ testq($src$$Register, $src$$Register);
16505 %}
16506 ins_pipe(ialu_cr_reg_imm);
16507 %}
16508
16509 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16510 %{
16511 match(Set cr (CmpL (AndL src con) zero));
16512
16513 format %{ "testq $src, $con\t# long" %}
16514 ins_encode %{
16515 __ testq($src$$Register, $con$$constant);
16516 %}
16517 ins_pipe(ialu_cr_reg_imm);
16518 %}
16519
16520 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16521 %{
16522 match(Set cr (CmpL (AndL src1 src2) zero));
16523
16524 format %{ "testq $src1, $src2\t# long" %}
16525 ins_encode %{
16526 __ testq($src1$$Register, $src2$$Register);
16527 %}
16528 ins_pipe(ialu_cr_reg_imm);
16529 %}
16530
16531 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16532 %{
16533 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16534
16535 format %{ "testq $src, $mem" %}
16536 ins_encode %{
16537 __ testq($src$$Register, $mem$$Address);
16538 %}
16539 ins_pipe(ialu_cr_reg_mem);
16540 %}
16541
16542 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16543 %{
16544 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16545
16546 format %{ "testq $src, $mem" %}
16547 ins_encode %{
16548 __ testq($src$$Register, $mem$$Address);
16549 %}
16550 ins_pipe(ialu_cr_reg_mem);
16551 %}
16552
16553 // Manifest a CmpU result in an integer register. Very painful.
16554 // This is the test to avoid.
16555 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16556 %{
16557 match(Set dst (CmpU3 src1 src2));
16558 effect(KILL flags);
16559
16560 ins_cost(275); // XXX
16561 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16562 "movl $dst, -1\n\t"
16563 "jb,u done\n\t"
16564 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16565 "done:" %}
16566 ins_encode %{
16567 Label done;
16568 __ cmpl($src1$$Register, $src2$$Register);
16569 __ movl($dst$$Register, -1);
16570 __ jccb(Assembler::below, done);
16571 __ setcc(Assembler::notZero, $dst$$Register);
16572 __ bind(done);
16573 %}
16574 ins_pipe(pipe_slow);
16575 %}
16576
16577 // Manifest a CmpL result in an integer register. Very painful.
16578 // This is the test to avoid.
16579 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16580 %{
16581 match(Set dst (CmpL3 src1 src2));
16582 effect(KILL flags);
16583
16584 ins_cost(275); // XXX
16585 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16586 "movl $dst, -1\n\t"
16587 "jl,s done\n\t"
16588 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16589 "done:" %}
16590 ins_encode %{
16591 Label done;
16592 __ cmpq($src1$$Register, $src2$$Register);
16593 __ movl($dst$$Register, -1);
16594 __ jccb(Assembler::less, done);
16595 __ setcc(Assembler::notZero, $dst$$Register);
16596 __ bind(done);
16597 %}
16598 ins_pipe(pipe_slow);
16599 %}
16600
16601 // Manifest a CmpUL result in an integer register. Very painful.
16602 // This is the test to avoid.
16603 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16604 %{
16605 match(Set dst (CmpUL3 src1 src2));
16606 effect(KILL flags);
16607
16608 ins_cost(275); // XXX
16609 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16610 "movl $dst, -1\n\t"
16611 "jb,u done\n\t"
16612 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16613 "done:" %}
16614 ins_encode %{
16615 Label done;
16616 __ cmpq($src1$$Register, $src2$$Register);
16617 __ movl($dst$$Register, -1);
16618 __ jccb(Assembler::below, done);
16619 __ setcc(Assembler::notZero, $dst$$Register);
16620 __ bind(done);
16621 %}
16622 ins_pipe(pipe_slow);
16623 %}
16624
16625 // Unsigned long compare Instructions; really, same as signed long except they
16626 // produce an rFlagsRegU instead of rFlagsReg.
16627 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16628 %{
16629 match(Set cr (CmpUL op1 op2));
16630
16631 format %{ "cmpq $op1, $op2\t# unsigned" %}
16632 ins_encode %{
16633 __ cmpq($op1$$Register, $op2$$Register);
16634 %}
16635 ins_pipe(ialu_cr_reg_reg);
16636 %}
16637
16638 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16639 %{
16640 match(Set cr (CmpUL op1 op2));
16641
16642 format %{ "cmpq $op1, $op2\t# unsigned" %}
16643 ins_encode %{
16644 __ cmpq($op1$$Register, $op2$$constant);
16645 %}
16646 ins_pipe(ialu_cr_reg_imm);
16647 %}
16648
16649 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16650 %{
16651 match(Set cr (CmpUL op1 (LoadL op2)));
16652
16653 format %{ "cmpq $op1, $op2\t# unsigned" %}
16654 ins_encode %{
16655 __ cmpq($op1$$Register, $op2$$Address);
16656 %}
16657 ins_pipe(ialu_cr_reg_mem);
16658 %}
16659
16660 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16661 %{
16662 match(Set cr (CmpUL src zero));
16663
16664 format %{ "testq $src, $src\t# unsigned" %}
16665 ins_encode %{
16666 __ testq($src$$Register, $src$$Register);
16667 %}
16668 ins_pipe(ialu_cr_reg_imm);
16669 %}
16670
16671 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16672 %{
16673 match(Set cr (CmpI (LoadB mem) imm));
16674
16675 ins_cost(125);
16676 format %{ "cmpb $mem, $imm" %}
16677 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16678 ins_pipe(ialu_cr_reg_mem);
16679 %}
16680
16681 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16682 %{
16683 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16684
16685 ins_cost(125);
16686 format %{ "testb $mem, $imm\t# ubyte" %}
16687 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16688 ins_pipe(ialu_cr_reg_mem);
16689 %}
16690
16691 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16692 %{
16693 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16694
16695 ins_cost(125);
16696 format %{ "testb $mem, $imm\t# byte" %}
16697 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16698 ins_pipe(ialu_cr_reg_mem);
16699 %}
16700
16701 //----------Max and Min--------------------------------------------------------
16702 // Min Instructions
16703
16704 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16705 %{
16706 predicate(!UseAPX);
16707 effect(USE_DEF dst, USE src, USE cr);
16708
16709 format %{ "cmovlgt $dst, $src\t# min" %}
16710 ins_encode %{
16711 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16712 %}
16713 ins_pipe(pipe_cmov_reg);
16714 %}
16715
16716 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16717 %{
16718 predicate(UseAPX);
16719 effect(DEF dst, USE src1, USE src2, USE cr);
16720
16721 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16722 ins_encode %{
16723 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16724 %}
16725 ins_pipe(pipe_cmov_reg);
16726 %}
16727
16728 instruct minI_rReg(rRegI dst, rRegI src)
16729 %{
16730 predicate(!UseAPX);
16731 match(Set dst (MinI dst src));
16732
16733 ins_cost(200);
16734 expand %{
16735 rFlagsReg cr;
16736 compI_rReg(cr, dst, src);
16737 cmovI_reg_g(dst, src, cr);
16738 %}
16739 %}
16740
16741 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16742 %{
16743 predicate(UseAPX);
16744 match(Set dst (MinI src1 src2));
16745 effect(DEF dst, USE src1, USE src2);
16746 flag(PD::Flag_ndd_demotable_opr1);
16747
16748 ins_cost(200);
16749 expand %{
16750 rFlagsReg cr;
16751 compI_rReg(cr, src1, src2);
16752 cmovI_reg_g_ndd(dst, src1, src2, cr);
16753 %}
16754 %}
16755
16756 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16757 %{
16758 predicate(!UseAPX);
16759 effect(USE_DEF dst, USE src, USE cr);
16760
16761 format %{ "cmovllt $dst, $src\t# max" %}
16762 ins_encode %{
16763 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16764 %}
16765 ins_pipe(pipe_cmov_reg);
16766 %}
16767
16768 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16769 %{
16770 predicate(UseAPX);
16771 effect(DEF dst, USE src1, USE src2, USE cr);
16772
16773 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16774 ins_encode %{
16775 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16776 %}
16777 ins_pipe(pipe_cmov_reg);
16778 %}
16779
16780 instruct maxI_rReg(rRegI dst, rRegI src)
16781 %{
16782 predicate(!UseAPX);
16783 match(Set dst (MaxI dst src));
16784
16785 ins_cost(200);
16786 expand %{
16787 rFlagsReg cr;
16788 compI_rReg(cr, dst, src);
16789 cmovI_reg_l(dst, src, cr);
16790 %}
16791 %}
16792
16793 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16794 %{
16795 predicate(UseAPX);
16796 match(Set dst (MaxI src1 src2));
16797 effect(DEF dst, USE src1, USE src2);
16798 flag(PD::Flag_ndd_demotable_opr1);
16799
16800 ins_cost(200);
16801 expand %{
16802 rFlagsReg cr;
16803 compI_rReg(cr, src1, src2);
16804 cmovI_reg_l_ndd(dst, src1, src2, cr);
16805 %}
16806 %}
16807
16808 // ============================================================================
16809 // Branch Instructions
16810
16811 // Jump Direct - Label defines a relative address from JMP+1
16812 instruct jmpDir(label labl)
16813 %{
16814 match(Goto);
16815 effect(USE labl);
16816
16817 ins_cost(300);
16818 format %{ "jmp $labl" %}
16819 size(5);
16820 ins_encode %{
16821 Label* L = $labl$$label;
16822 __ jmp(*L, false); // Always long jump
16823 %}
16824 ins_pipe(pipe_jmp);
16825 %}
16826
16827 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16828 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16829 %{
16830 match(If cop cr);
16831 effect(USE labl);
16832
16833 ins_cost(300);
16834 format %{ "j$cop $labl" %}
16835 size(6);
16836 ins_encode %{
16837 Label* L = $labl$$label;
16838 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16839 %}
16840 ins_pipe(pipe_jcc);
16841 %}
16842
16843 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16844 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16845 %{
16846 match(CountedLoopEnd cop cr);
16847 effect(USE labl);
16848
16849 ins_cost(300);
16850 format %{ "j$cop $labl\t# loop end" %}
16851 size(6);
16852 ins_encode %{
16853 Label* L = $labl$$label;
16854 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16855 %}
16856 ins_pipe(pipe_jcc);
16857 %}
16858
16859 // Jump Direct Conditional - using unsigned comparison
16860 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16861 match(If cop cmp);
16862 effect(USE labl);
16863
16864 ins_cost(300);
16865 format %{ "j$cop,u $labl" %}
16866 size(6);
16867 ins_encode %{
16868 Label* L = $labl$$label;
16869 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16870 %}
16871 ins_pipe(pipe_jcc);
16872 %}
16873
16874 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16875 match(If cop cmp);
16876 effect(USE labl);
16877
16878 ins_cost(200);
16879 format %{ "j$cop,u $labl" %}
16880 size(6);
16881 ins_encode %{
16882 Label* L = $labl$$label;
16883 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16884 %}
16885 ins_pipe(pipe_jcc);
16886 %}
16887
16888 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16889 match(If cop cmp);
16890 effect(USE labl);
16891
16892 ins_cost(200);
16893 format %{ $$template
16894 if ($cop$$cmpcode == Assembler::notEqual) {
16895 $$emit$$"jp,u $labl\n\t"
16896 $$emit$$"j$cop,u $labl"
16897 } else {
16898 $$emit$$"jp,u done\n\t"
16899 $$emit$$"j$cop,u $labl\n\t"
16900 $$emit$$"done:"
16901 }
16902 %}
16903 ins_encode %{
16904 Label* l = $labl$$label;
16905 if ($cop$$cmpcode == Assembler::notEqual) {
16906 __ jcc(Assembler::parity, *l, false);
16907 __ jcc(Assembler::notEqual, *l, false);
16908 } else if ($cop$$cmpcode == Assembler::equal) {
16909 Label done;
16910 __ jccb(Assembler::parity, done);
16911 __ jcc(Assembler::equal, *l, false);
16912 __ bind(done);
16913 } else {
16914 ShouldNotReachHere();
16915 }
16916 %}
16917 ins_pipe(pipe_jcc);
16918 %}
16919
16920 // Jump Direct Conditional - using signed and unsigned comparison
16921 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16922 match(If cop cmp);
16923 effect(USE labl);
16924
16925 ins_cost(200);
16926 format %{ "j$cop,su $labl" %}
16927 size(6);
16928 ins_encode %{
16929 Label* L = $labl$$label;
16930 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16931 %}
16932 ins_pipe(pipe_jcc);
16933 %}
16934
16935 // ============================================================================
16936 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16937 // superklass array for an instance of the superklass. Set a hidden
16938 // internal cache on a hit (cache is checked with exposed code in
16939 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16940 // encoding ALSO sets flags.
16941
16942 instruct partialSubtypeCheck(rdi_RegP result,
16943 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16944 rFlagsReg cr)
16945 %{
16946 match(Set result (PartialSubtypeCheck sub super));
16947 predicate(!UseSecondarySupersTable);
16948 effect(KILL rcx, KILL cr);
16949
16950 ins_cost(1100); // slightly larger than the next version
16951 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16952 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16953 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16954 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16955 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16956 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16957 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16958 "miss:\t" %}
16959
16960 ins_encode %{
16961 Label miss;
16962 // NB: Callers may assume that, when $result is a valid register,
16963 // check_klass_subtype_slow_path_linear sets it to a nonzero
16964 // value.
16965 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16966 $rcx$$Register, $result$$Register,
16967 nullptr, &miss,
16968 /*set_cond_codes:*/ true);
16969 __ xorptr($result$$Register, $result$$Register);
16970 __ bind(miss);
16971 %}
16972
16973 ins_pipe(pipe_slow);
16974 %}
16975
16976 // ============================================================================
16977 // Two versions of hashtable-based partialSubtypeCheck, both used when
16978 // we need to search for a super class in the secondary supers array.
16979 // The first is used when we don't know _a priori_ the class being
16980 // searched for. The second, far more common, is used when we do know:
16981 // this is used for instanceof, checkcast, and any case where C2 can
16982 // determine it by constant propagation.
16983
16984 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16985 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16986 rFlagsReg cr)
16987 %{
16988 match(Set result (PartialSubtypeCheck sub super));
16989 predicate(UseSecondarySupersTable);
16990 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16991
16992 ins_cost(1000);
16993 format %{ "partialSubtypeCheck $result, $sub, $super" %}
16994
16995 ins_encode %{
16996 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16997 $temp3$$Register, $temp4$$Register, $result$$Register);
16998 %}
16999
17000 ins_pipe(pipe_slow);
17001 %}
17002
17003 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17004 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17005 rFlagsReg cr)
17006 %{
17007 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17008 predicate(UseSecondarySupersTable);
17009 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17010
17011 ins_cost(700); // smaller than the next version
17012 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17013
17014 ins_encode %{
17015 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17016 if (InlineSecondarySupersTest) {
17017 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17018 $temp3$$Register, $temp4$$Register, $result$$Register,
17019 super_klass_slot);
17020 } else {
17021 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17022 }
17023 %}
17024
17025 ins_pipe(pipe_slow);
17026 %}
17027
17028 // ============================================================================
17029 // Branch Instructions -- short offset versions
17030 //
17031 // These instructions are used to replace jumps of a long offset (the default
17032 // match) with jumps of a shorter offset. These instructions are all tagged
17033 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17034 // match rules in general matching. Instead, the ADLC generates a conversion
17035 // method in the MachNode which can be used to do in-place replacement of the
17036 // long variant with the shorter variant. The compiler will determine if a
17037 // branch can be taken by the is_short_branch_offset() predicate in the machine
17038 // specific code section of the file.
17039
17040 // Jump Direct - Label defines a relative address from JMP+1
17041 instruct jmpDir_short(label labl) %{
17042 match(Goto);
17043 effect(USE labl);
17044
17045 ins_cost(300);
17046 format %{ "jmp,s $labl" %}
17047 size(2);
17048 ins_encode %{
17049 Label* L = $labl$$label;
17050 __ jmpb(*L);
17051 %}
17052 ins_pipe(pipe_jmp);
17053 ins_short_branch(1);
17054 %}
17055
17056 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17057 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17058 match(If cop cr);
17059 effect(USE labl);
17060
17061 ins_cost(300);
17062 format %{ "j$cop,s $labl" %}
17063 size(2);
17064 ins_encode %{
17065 Label* L = $labl$$label;
17066 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17067 %}
17068 ins_pipe(pipe_jcc);
17069 ins_short_branch(1);
17070 %}
17071
17072 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17073 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17074 match(CountedLoopEnd cop cr);
17075 effect(USE labl);
17076
17077 ins_cost(300);
17078 format %{ "j$cop,s $labl\t# loop end" %}
17079 size(2);
17080 ins_encode %{
17081 Label* L = $labl$$label;
17082 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17083 %}
17084 ins_pipe(pipe_jcc);
17085 ins_short_branch(1);
17086 %}
17087
17088 // Jump Direct Conditional - using unsigned comparison
17089 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17090 match(If cop cmp);
17091 effect(USE labl);
17092
17093 ins_cost(300);
17094 format %{ "j$cop,us $labl" %}
17095 size(2);
17096 ins_encode %{
17097 Label* L = $labl$$label;
17098 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17099 %}
17100 ins_pipe(pipe_jcc);
17101 ins_short_branch(1);
17102 %}
17103
17104 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17105 match(If cop cmp);
17106 effect(USE labl);
17107
17108 ins_cost(300);
17109 format %{ "j$cop,us $labl" %}
17110 size(2);
17111 ins_encode %{
17112 Label* L = $labl$$label;
17113 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17114 %}
17115 ins_pipe(pipe_jcc);
17116 ins_short_branch(1);
17117 %}
17118
17119 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17120 match(If cop cmp);
17121 effect(USE labl);
17122
17123 ins_cost(300);
17124 format %{ $$template
17125 if ($cop$$cmpcode == Assembler::notEqual) {
17126 $$emit$$"jp,u,s $labl\n\t"
17127 $$emit$$"j$cop,u,s $labl"
17128 } else {
17129 $$emit$$"jp,u,s done\n\t"
17130 $$emit$$"j$cop,u,s $labl\n\t"
17131 $$emit$$"done:"
17132 }
17133 %}
17134 size(4);
17135 ins_encode %{
17136 Label* l = $labl$$label;
17137 if ($cop$$cmpcode == Assembler::notEqual) {
17138 __ jccb(Assembler::parity, *l);
17139 __ jccb(Assembler::notEqual, *l);
17140 } else if ($cop$$cmpcode == Assembler::equal) {
17141 Label done;
17142 __ jccb(Assembler::parity, done);
17143 __ jccb(Assembler::equal, *l);
17144 __ bind(done);
17145 } else {
17146 ShouldNotReachHere();
17147 }
17148 %}
17149 ins_pipe(pipe_jcc);
17150 ins_short_branch(1);
17151 %}
17152
17153 // Jump Direct Conditional - using signed and unsigned comparison
17154 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17155 match(If cop cmp);
17156 effect(USE labl);
17157
17158 ins_cost(300);
17159 format %{ "j$cop,sus $labl" %}
17160 size(2);
17161 ins_encode %{
17162 Label* L = $labl$$label;
17163 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17164 %}
17165 ins_pipe(pipe_jcc);
17166 ins_short_branch(1);
17167 %}
17168
17169 // ============================================================================
17170 // inlined locking and unlocking
17171
17172 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17173 match(Set cr (FastLock object box));
17174 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17175 ins_cost(300);
17176 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17177 ins_encode %{
17178 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17179 %}
17180 ins_pipe(pipe_slow);
17181 %}
17182
17183 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17184 match(Set cr (FastUnlock object rax_reg));
17185 effect(TEMP tmp, USE_KILL rax_reg);
17186 ins_cost(300);
17187 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17188 ins_encode %{
17189 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17190 %}
17191 ins_pipe(pipe_slow);
17192 %}
17193
17194
17195 // ============================================================================
17196 // Safepoint Instructions
17197 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17198 %{
17199 match(SafePoint poll);
17200 effect(KILL cr, USE poll);
17201
17202 format %{ "testl rax, [$poll]\t"
17203 "# Safepoint: poll for GC" %}
17204 ins_cost(125);
17205 ins_encode %{
17206 __ relocate(relocInfo::poll_type);
17207 address pre_pc = __ pc();
17208 __ testl(rax, Address($poll$$Register, 0));
17209 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17210 %}
17211 ins_pipe(ialu_reg_mem);
17212 %}
17213
17214 instruct mask_all_evexL(kReg dst, rRegL src) %{
17215 match(Set dst (MaskAll src));
17216 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17217 ins_encode %{
17218 int mask_len = Matcher::vector_length(this);
17219 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17220 %}
17221 ins_pipe( pipe_slow );
17222 %}
17223
17224 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17225 predicate(Matcher::vector_length(n) > 32);
17226 match(Set dst (MaskAll src));
17227 effect(TEMP tmp);
17228 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17229 ins_encode %{
17230 int mask_len = Matcher::vector_length(this);
17231 __ movslq($tmp$$Register, $src$$Register);
17232 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17233 %}
17234 ins_pipe( pipe_slow );
17235 %}
17236
17237 // ============================================================================
17238 // Procedure Call/Return Instructions
17239 // Call Java Static Instruction
17240 // Note: If this code changes, the corresponding ret_addr_offset() and
17241 // compute_padding() functions will have to be adjusted.
17242 instruct CallStaticJavaDirect(method meth) %{
17243 match(CallStaticJava);
17244 effect(USE meth);
17245
17246 ins_cost(300);
17247 format %{ "call,static " %}
17248 opcode(0xE8); /* E8 cd */
17249 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17250 ins_pipe(pipe_slow);
17251 ins_alignment(4);
17252 %}
17253
17254 // Call Java Dynamic Instruction
17255 // Note: If this code changes, the corresponding ret_addr_offset() and
17256 // compute_padding() functions will have to be adjusted.
17257 instruct CallDynamicJavaDirect(method meth)
17258 %{
17259 match(CallDynamicJava);
17260 effect(USE meth);
17261
17262 ins_cost(300);
17263 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17264 "call,dynamic " %}
17265 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17266 ins_pipe(pipe_slow);
17267 ins_alignment(4);
17268 %}
17269
17270 // Call Runtime Instruction
17271 instruct CallRuntimeDirect(method meth)
17272 %{
17273 match(CallRuntime);
17274 effect(USE meth);
17275
17276 ins_cost(300);
17277 format %{ "call,runtime " %}
17278 ins_encode(clear_avx, Java_To_Runtime(meth));
17279 ins_pipe(pipe_slow);
17280 %}
17281
17282 // Call runtime without safepoint
17283 instruct CallLeafDirect(method meth)
17284 %{
17285 match(CallLeaf);
17286 effect(USE meth);
17287
17288 ins_cost(300);
17289 format %{ "call_leaf,runtime " %}
17290 ins_encode(clear_avx, Java_To_Runtime(meth));
17291 ins_pipe(pipe_slow);
17292 %}
17293
17294 // Call runtime without safepoint and with vector arguments
17295 instruct CallLeafDirectVector(method meth)
17296 %{
17297 match(CallLeafVector);
17298 effect(USE meth);
17299
17300 ins_cost(300);
17301 format %{ "call_leaf,vector " %}
17302 ins_encode(Java_To_Runtime(meth));
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 // Call runtime without safepoint
17307 instruct CallLeafNoFPDirect(method meth)
17308 %{
17309 match(CallLeafNoFP);
17310 effect(USE meth);
17311
17312 ins_cost(300);
17313 format %{ "call_leaf_nofp,runtime " %}
17314 ins_encode(clear_avx, Java_To_Runtime(meth));
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 // Return Instruction
17319 // Remove the return address & jump to it.
17320 // Notice: We always emit a nop after a ret to make sure there is room
17321 // for safepoint patching
17322 instruct Ret()
17323 %{
17324 match(Return);
17325
17326 format %{ "ret" %}
17327 ins_encode %{
17328 __ ret(0);
17329 %}
17330 ins_pipe(pipe_jmp);
17331 %}
17332
17333 // Tail Call; Jump from runtime stub to Java code.
17334 // Also known as an 'interprocedural jump'.
17335 // Target of jump will eventually return to caller.
17336 // TailJump below removes the return address.
17337 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17338 // emitted just above the TailCall which has reset rbp to the caller state.
17339 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17340 %{
17341 match(TailCall jump_target method_ptr);
17342
17343 ins_cost(300);
17344 format %{ "jmp $jump_target\t# rbx holds method" %}
17345 ins_encode %{
17346 __ jmp($jump_target$$Register);
17347 %}
17348 ins_pipe(pipe_jmp);
17349 %}
17350
17351 // Tail Jump; remove the return address; jump to target.
17352 // TailCall above leaves the return address around.
17353 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17354 %{
17355 match(TailJump jump_target ex_oop);
17356
17357 ins_cost(300);
17358 format %{ "popq rdx\t# pop return address\n\t"
17359 "jmp $jump_target" %}
17360 ins_encode %{
17361 __ popq(as_Register(RDX_enc));
17362 __ jmp($jump_target$$Register);
17363 %}
17364 ins_pipe(pipe_jmp);
17365 %}
17366
17367 // Forward exception.
17368 instruct ForwardExceptionjmp()
17369 %{
17370 match(ForwardException);
17371
17372 format %{ "jmp forward_exception_stub" %}
17373 ins_encode %{
17374 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17375 %}
17376 ins_pipe(pipe_jmp);
17377 %}
17378
17379 // Create exception oop: created by stack-crawling runtime code.
17380 // Created exception is now available to this handler, and is setup
17381 // just prior to jumping to this handler. No code emitted.
17382 instruct CreateException(rax_RegP ex_oop)
17383 %{
17384 match(Set ex_oop (CreateEx));
17385
17386 size(0);
17387 // use the following format syntax
17388 format %{ "# exception oop is in rax; no code emitted" %}
17389 ins_encode();
17390 ins_pipe(empty);
17391 %}
17392
17393 // Rethrow exception:
17394 // The exception oop will come in the first argument position.
17395 // Then JUMP (not call) to the rethrow stub code.
17396 instruct RethrowException()
17397 %{
17398 match(Rethrow);
17399
17400 // use the following format syntax
17401 format %{ "jmp rethrow_stub" %}
17402 ins_encode %{
17403 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17404 %}
17405 ins_pipe(pipe_jmp);
17406 %}
17407
17408 // ============================================================================
17409 // This name is KNOWN by the ADLC and cannot be changed.
17410 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17411 // for this guy.
17412 instruct tlsLoadP(r15_RegP dst) %{
17413 match(Set dst (ThreadLocal));
17414 effect(DEF dst);
17415
17416 size(0);
17417 format %{ "# TLS is in R15" %}
17418 ins_encode( /*empty encoding*/ );
17419 ins_pipe(ialu_reg_reg);
17420 %}
17421
17422 instruct addF_reg(regF dst, regF src) %{
17423 predicate(UseAVX == 0);
17424 match(Set dst (AddF dst src));
17425
17426 format %{ "addss $dst, $src" %}
17427 ins_cost(150);
17428 ins_encode %{
17429 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17430 %}
17431 ins_pipe(pipe_slow);
17432 %}
17433
17434 instruct addF_mem(regF dst, memory src) %{
17435 predicate(UseAVX == 0);
17436 match(Set dst (AddF dst (LoadF src)));
17437
17438 format %{ "addss $dst, $src" %}
17439 ins_cost(150);
17440 ins_encode %{
17441 __ addss($dst$$XMMRegister, $src$$Address);
17442 %}
17443 ins_pipe(pipe_slow);
17444 %}
17445
17446 instruct addF_imm(regF dst, immF con) %{
17447 predicate(UseAVX == 0);
17448 match(Set dst (AddF dst con));
17449 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17450 ins_cost(150);
17451 ins_encode %{
17452 __ addss($dst$$XMMRegister, $constantaddress($con));
17453 %}
17454 ins_pipe(pipe_slow);
17455 %}
17456
17457 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17458 predicate(UseAVX > 0);
17459 match(Set dst (AddF src1 src2));
17460
17461 format %{ "vaddss $dst, $src1, $src2" %}
17462 ins_cost(150);
17463 ins_encode %{
17464 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17465 %}
17466 ins_pipe(pipe_slow);
17467 %}
17468
17469 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17470 predicate(UseAVX > 0);
17471 match(Set dst (AddF src1 (LoadF src2)));
17472
17473 format %{ "vaddss $dst, $src1, $src2" %}
17474 ins_cost(150);
17475 ins_encode %{
17476 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17477 %}
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17482 predicate(UseAVX > 0);
17483 match(Set dst (AddF src con));
17484
17485 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17486 ins_cost(150);
17487 ins_encode %{
17488 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 instruct addD_reg(regD dst, regD src) %{
17494 predicate(UseAVX == 0);
17495 match(Set dst (AddD dst src));
17496
17497 format %{ "addsd $dst, $src" %}
17498 ins_cost(150);
17499 ins_encode %{
17500 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17501 %}
17502 ins_pipe(pipe_slow);
17503 %}
17504
17505 instruct addD_mem(regD dst, memory src) %{
17506 predicate(UseAVX == 0);
17507 match(Set dst (AddD dst (LoadD src)));
17508
17509 format %{ "addsd $dst, $src" %}
17510 ins_cost(150);
17511 ins_encode %{
17512 __ addsd($dst$$XMMRegister, $src$$Address);
17513 %}
17514 ins_pipe(pipe_slow);
17515 %}
17516
17517 instruct addD_imm(regD dst, immD con) %{
17518 predicate(UseAVX == 0);
17519 match(Set dst (AddD dst con));
17520 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17521 ins_cost(150);
17522 ins_encode %{
17523 __ addsd($dst$$XMMRegister, $constantaddress($con));
17524 %}
17525 ins_pipe(pipe_slow);
17526 %}
17527
17528 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17529 predicate(UseAVX > 0);
17530 match(Set dst (AddD src1 src2));
17531
17532 format %{ "vaddsd $dst, $src1, $src2" %}
17533 ins_cost(150);
17534 ins_encode %{
17535 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17536 %}
17537 ins_pipe(pipe_slow);
17538 %}
17539
17540 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17541 predicate(UseAVX > 0);
17542 match(Set dst (AddD src1 (LoadD src2)));
17543
17544 format %{ "vaddsd $dst, $src1, $src2" %}
17545 ins_cost(150);
17546 ins_encode %{
17547 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17553 predicate(UseAVX > 0);
17554 match(Set dst (AddD src con));
17555
17556 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17557 ins_cost(150);
17558 ins_encode %{
17559 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17560 %}
17561 ins_pipe(pipe_slow);
17562 %}
17563
17564 instruct subF_reg(regF dst, regF src) %{
17565 predicate(UseAVX == 0);
17566 match(Set dst (SubF dst src));
17567
17568 format %{ "subss $dst, $src" %}
17569 ins_cost(150);
17570 ins_encode %{
17571 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17572 %}
17573 ins_pipe(pipe_slow);
17574 %}
17575
17576 instruct subF_mem(regF dst, memory src) %{
17577 predicate(UseAVX == 0);
17578 match(Set dst (SubF dst (LoadF src)));
17579
17580 format %{ "subss $dst, $src" %}
17581 ins_cost(150);
17582 ins_encode %{
17583 __ subss($dst$$XMMRegister, $src$$Address);
17584 %}
17585 ins_pipe(pipe_slow);
17586 %}
17587
17588 instruct subF_imm(regF dst, immF con) %{
17589 predicate(UseAVX == 0);
17590 match(Set dst (SubF dst con));
17591 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17592 ins_cost(150);
17593 ins_encode %{
17594 __ subss($dst$$XMMRegister, $constantaddress($con));
17595 %}
17596 ins_pipe(pipe_slow);
17597 %}
17598
17599 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17600 predicate(UseAVX > 0);
17601 match(Set dst (SubF src1 src2));
17602
17603 format %{ "vsubss $dst, $src1, $src2" %}
17604 ins_cost(150);
17605 ins_encode %{
17606 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17607 %}
17608 ins_pipe(pipe_slow);
17609 %}
17610
17611 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17612 predicate(UseAVX > 0);
17613 match(Set dst (SubF src1 (LoadF src2)));
17614
17615 format %{ "vsubss $dst, $src1, $src2" %}
17616 ins_cost(150);
17617 ins_encode %{
17618 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17619 %}
17620 ins_pipe(pipe_slow);
17621 %}
17622
17623 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17624 predicate(UseAVX > 0);
17625 match(Set dst (SubF src con));
17626
17627 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17628 ins_cost(150);
17629 ins_encode %{
17630 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17631 %}
17632 ins_pipe(pipe_slow);
17633 %}
17634
17635 instruct subD_reg(regD dst, regD src) %{
17636 predicate(UseAVX == 0);
17637 match(Set dst (SubD dst src));
17638
17639 format %{ "subsd $dst, $src" %}
17640 ins_cost(150);
17641 ins_encode %{
17642 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17643 %}
17644 ins_pipe(pipe_slow);
17645 %}
17646
17647 instruct subD_mem(regD dst, memory src) %{
17648 predicate(UseAVX == 0);
17649 match(Set dst (SubD dst (LoadD src)));
17650
17651 format %{ "subsd $dst, $src" %}
17652 ins_cost(150);
17653 ins_encode %{
17654 __ subsd($dst$$XMMRegister, $src$$Address);
17655 %}
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 instruct subD_imm(regD dst, immD con) %{
17660 predicate(UseAVX == 0);
17661 match(Set dst (SubD dst con));
17662 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17663 ins_cost(150);
17664 ins_encode %{
17665 __ subsd($dst$$XMMRegister, $constantaddress($con));
17666 %}
17667 ins_pipe(pipe_slow);
17668 %}
17669
17670 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17671 predicate(UseAVX > 0);
17672 match(Set dst (SubD src1 src2));
17673
17674 format %{ "vsubsd $dst, $src1, $src2" %}
17675 ins_cost(150);
17676 ins_encode %{
17677 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17678 %}
17679 ins_pipe(pipe_slow);
17680 %}
17681
17682 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17683 predicate(UseAVX > 0);
17684 match(Set dst (SubD src1 (LoadD src2)));
17685
17686 format %{ "vsubsd $dst, $src1, $src2" %}
17687 ins_cost(150);
17688 ins_encode %{
17689 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17690 %}
17691 ins_pipe(pipe_slow);
17692 %}
17693
17694 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17695 predicate(UseAVX > 0);
17696 match(Set dst (SubD src con));
17697
17698 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17699 ins_cost(150);
17700 ins_encode %{
17701 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17702 %}
17703 ins_pipe(pipe_slow);
17704 %}
17705
17706 instruct mulF_reg(regF dst, regF src) %{
17707 predicate(UseAVX == 0);
17708 match(Set dst (MulF dst src));
17709
17710 format %{ "mulss $dst, $src" %}
17711 ins_cost(150);
17712 ins_encode %{
17713 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17714 %}
17715 ins_pipe(pipe_slow);
17716 %}
17717
17718 instruct mulF_mem(regF dst, memory src) %{
17719 predicate(UseAVX == 0);
17720 match(Set dst (MulF dst (LoadF src)));
17721
17722 format %{ "mulss $dst, $src" %}
17723 ins_cost(150);
17724 ins_encode %{
17725 __ mulss($dst$$XMMRegister, $src$$Address);
17726 %}
17727 ins_pipe(pipe_slow);
17728 %}
17729
17730 instruct mulF_imm(regF dst, immF con) %{
17731 predicate(UseAVX == 0);
17732 match(Set dst (MulF dst con));
17733 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17734 ins_cost(150);
17735 ins_encode %{
17736 __ mulss($dst$$XMMRegister, $constantaddress($con));
17737 %}
17738 ins_pipe(pipe_slow);
17739 %}
17740
17741 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17742 predicate(UseAVX > 0);
17743 match(Set dst (MulF src1 src2));
17744
17745 format %{ "vmulss $dst, $src1, $src2" %}
17746 ins_cost(150);
17747 ins_encode %{
17748 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17749 %}
17750 ins_pipe(pipe_slow);
17751 %}
17752
17753 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17754 predicate(UseAVX > 0);
17755 match(Set dst (MulF src1 (LoadF src2)));
17756
17757 format %{ "vmulss $dst, $src1, $src2" %}
17758 ins_cost(150);
17759 ins_encode %{
17760 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17761 %}
17762 ins_pipe(pipe_slow);
17763 %}
17764
17765 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17766 predicate(UseAVX > 0);
17767 match(Set dst (MulF src con));
17768
17769 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17770 ins_cost(150);
17771 ins_encode %{
17772 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct mulD_reg(regD dst, regD src) %{
17778 predicate(UseAVX == 0);
17779 match(Set dst (MulD dst src));
17780
17781 format %{ "mulsd $dst, $src" %}
17782 ins_cost(150);
17783 ins_encode %{
17784 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17785 %}
17786 ins_pipe(pipe_slow);
17787 %}
17788
17789 instruct mulD_mem(regD dst, memory src) %{
17790 predicate(UseAVX == 0);
17791 match(Set dst (MulD dst (LoadD src)));
17792
17793 format %{ "mulsd $dst, $src" %}
17794 ins_cost(150);
17795 ins_encode %{
17796 __ mulsd($dst$$XMMRegister, $src$$Address);
17797 %}
17798 ins_pipe(pipe_slow);
17799 %}
17800
17801 instruct mulD_imm(regD dst, immD con) %{
17802 predicate(UseAVX == 0);
17803 match(Set dst (MulD dst con));
17804 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17805 ins_cost(150);
17806 ins_encode %{
17807 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17808 %}
17809 ins_pipe(pipe_slow);
17810 %}
17811
17812 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17813 predicate(UseAVX > 0);
17814 match(Set dst (MulD src1 src2));
17815
17816 format %{ "vmulsd $dst, $src1, $src2" %}
17817 ins_cost(150);
17818 ins_encode %{
17819 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17820 %}
17821 ins_pipe(pipe_slow);
17822 %}
17823
17824 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17825 predicate(UseAVX > 0);
17826 match(Set dst (MulD src1 (LoadD src2)));
17827
17828 format %{ "vmulsd $dst, $src1, $src2" %}
17829 ins_cost(150);
17830 ins_encode %{
17831 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17832 %}
17833 ins_pipe(pipe_slow);
17834 %}
17835
17836 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17837 predicate(UseAVX > 0);
17838 match(Set dst (MulD src con));
17839
17840 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17841 ins_cost(150);
17842 ins_encode %{
17843 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17844 %}
17845 ins_pipe(pipe_slow);
17846 %}
17847
17848 instruct divF_reg(regF dst, regF src) %{
17849 predicate(UseAVX == 0);
17850 match(Set dst (DivF dst src));
17851
17852 format %{ "divss $dst, $src" %}
17853 ins_cost(150);
17854 ins_encode %{
17855 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17856 %}
17857 ins_pipe(pipe_slow);
17858 %}
17859
17860 instruct divF_mem(regF dst, memory src) %{
17861 predicate(UseAVX == 0);
17862 match(Set dst (DivF dst (LoadF src)));
17863
17864 format %{ "divss $dst, $src" %}
17865 ins_cost(150);
17866 ins_encode %{
17867 __ divss($dst$$XMMRegister, $src$$Address);
17868 %}
17869 ins_pipe(pipe_slow);
17870 %}
17871
17872 instruct divF_imm(regF dst, immF con) %{
17873 predicate(UseAVX == 0);
17874 match(Set dst (DivF dst con));
17875 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17876 ins_cost(150);
17877 ins_encode %{
17878 __ divss($dst$$XMMRegister, $constantaddress($con));
17879 %}
17880 ins_pipe(pipe_slow);
17881 %}
17882
17883 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17884 predicate(UseAVX > 0);
17885 match(Set dst (DivF src1 src2));
17886
17887 format %{ "vdivss $dst, $src1, $src2" %}
17888 ins_cost(150);
17889 ins_encode %{
17890 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17891 %}
17892 ins_pipe(pipe_slow);
17893 %}
17894
17895 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17896 predicate(UseAVX > 0);
17897 match(Set dst (DivF src1 (LoadF src2)));
17898
17899 format %{ "vdivss $dst, $src1, $src2" %}
17900 ins_cost(150);
17901 ins_encode %{
17902 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17903 %}
17904 ins_pipe(pipe_slow);
17905 %}
17906
17907 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17908 predicate(UseAVX > 0);
17909 match(Set dst (DivF src con));
17910
17911 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17912 ins_cost(150);
17913 ins_encode %{
17914 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17915 %}
17916 ins_pipe(pipe_slow);
17917 %}
17918
17919 instruct divD_reg(regD dst, regD src) %{
17920 predicate(UseAVX == 0);
17921 match(Set dst (DivD dst src));
17922
17923 format %{ "divsd $dst, $src" %}
17924 ins_cost(150);
17925 ins_encode %{
17926 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17927 %}
17928 ins_pipe(pipe_slow);
17929 %}
17930
17931 instruct divD_mem(regD dst, memory src) %{
17932 predicate(UseAVX == 0);
17933 match(Set dst (DivD dst (LoadD src)));
17934
17935 format %{ "divsd $dst, $src" %}
17936 ins_cost(150);
17937 ins_encode %{
17938 __ divsd($dst$$XMMRegister, $src$$Address);
17939 %}
17940 ins_pipe(pipe_slow);
17941 %}
17942
17943 instruct divD_imm(regD dst, immD con) %{
17944 predicate(UseAVX == 0);
17945 match(Set dst (DivD dst con));
17946 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17947 ins_cost(150);
17948 ins_encode %{
17949 __ divsd($dst$$XMMRegister, $constantaddress($con));
17950 %}
17951 ins_pipe(pipe_slow);
17952 %}
17953
17954 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17955 predicate(UseAVX > 0);
17956 match(Set dst (DivD src1 src2));
17957
17958 format %{ "vdivsd $dst, $src1, $src2" %}
17959 ins_cost(150);
17960 ins_encode %{
17961 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17962 %}
17963 ins_pipe(pipe_slow);
17964 %}
17965
17966 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17967 predicate(UseAVX > 0);
17968 match(Set dst (DivD src1 (LoadD src2)));
17969
17970 format %{ "vdivsd $dst, $src1, $src2" %}
17971 ins_cost(150);
17972 ins_encode %{
17973 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17974 %}
17975 ins_pipe(pipe_slow);
17976 %}
17977
17978 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17979 predicate(UseAVX > 0);
17980 match(Set dst (DivD src con));
17981
17982 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17983 ins_cost(150);
17984 ins_encode %{
17985 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17986 %}
17987 ins_pipe(pipe_slow);
17988 %}
17989
17990 instruct absF_reg(regF dst) %{
17991 predicate(UseAVX == 0);
17992 match(Set dst (AbsF dst));
17993 ins_cost(150);
17994 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
17995 ins_encode %{
17996 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17997 %}
17998 ins_pipe(pipe_slow);
17999 %}
18000
18001 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18002 predicate(UseAVX > 0);
18003 match(Set dst (AbsF src));
18004 ins_cost(150);
18005 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18006 ins_encode %{
18007 int vlen_enc = Assembler::AVX_128bit;
18008 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18009 ExternalAddress(float_signmask()), vlen_enc);
18010 %}
18011 ins_pipe(pipe_slow);
18012 %}
18013
18014 instruct absD_reg(regD dst) %{
18015 predicate(UseAVX == 0);
18016 match(Set dst (AbsD dst));
18017 ins_cost(150);
18018 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18019 "# abs double by sign masking" %}
18020 ins_encode %{
18021 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18022 %}
18023 ins_pipe(pipe_slow);
18024 %}
18025
18026 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18027 predicate(UseAVX > 0);
18028 match(Set dst (AbsD src));
18029 ins_cost(150);
18030 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18031 "# abs double by sign masking" %}
18032 ins_encode %{
18033 int vlen_enc = Assembler::AVX_128bit;
18034 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18035 ExternalAddress(double_signmask()), vlen_enc);
18036 %}
18037 ins_pipe(pipe_slow);
18038 %}
18039
18040 instruct negF_reg(regF dst) %{
18041 predicate(UseAVX == 0);
18042 match(Set dst (NegF dst));
18043 ins_cost(150);
18044 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18045 ins_encode %{
18046 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18047 %}
18048 ins_pipe(pipe_slow);
18049 %}
18050
18051 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18052 predicate(UseAVX > 0);
18053 match(Set dst (NegF src));
18054 ins_cost(150);
18055 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18056 ins_encode %{
18057 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18058 ExternalAddress(float_signflip()));
18059 %}
18060 ins_pipe(pipe_slow);
18061 %}
18062
18063 instruct negD_reg(regD dst) %{
18064 predicate(UseAVX == 0);
18065 match(Set dst (NegD dst));
18066 ins_cost(150);
18067 format %{ "xorpd $dst, [0x8000000000000000]\t"
18068 "# neg double by sign flipping" %}
18069 ins_encode %{
18070 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18071 %}
18072 ins_pipe(pipe_slow);
18073 %}
18074
18075 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18076 predicate(UseAVX > 0);
18077 match(Set dst (NegD src));
18078 ins_cost(150);
18079 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18080 "# neg double by sign flipping" %}
18081 ins_encode %{
18082 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18083 ExternalAddress(double_signflip()));
18084 %}
18085 ins_pipe(pipe_slow);
18086 %}
18087
18088 // sqrtss instruction needs destination register to be pre initialized for best performance
18089 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18090 instruct sqrtF_reg(regF dst) %{
18091 match(Set dst (SqrtF dst));
18092 format %{ "sqrtss $dst, $dst" %}
18093 ins_encode %{
18094 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18095 %}
18096 ins_pipe(pipe_slow);
18097 %}
18098
18099 // sqrtsd instruction needs destination register to be pre initialized for best performance
18100 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18101 instruct sqrtD_reg(regD dst) %{
18102 match(Set dst (SqrtD dst));
18103 format %{ "sqrtsd $dst, $dst" %}
18104 ins_encode %{
18105 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18106 %}
18107 ins_pipe(pipe_slow);
18108 %}
18109
18110 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18111 effect(TEMP tmp);
18112 match(Set dst (ConvF2HF src));
18113 ins_cost(125);
18114 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18115 ins_encode %{
18116 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18117 %}
18118 ins_pipe( pipe_slow );
18119 %}
18120
18121 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18122 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18123 effect(TEMP ktmp, TEMP rtmp);
18124 match(Set mem (StoreC mem (ConvF2HF src)));
18125 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18126 ins_encode %{
18127 __ movl($rtmp$$Register, 0x1);
18128 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18129 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18130 %}
18131 ins_pipe( pipe_slow );
18132 %}
18133
18134 instruct vconvF2HF(vec dst, vec src) %{
18135 match(Set dst (VectorCastF2HF src));
18136 format %{ "vector_conv_F2HF $dst $src" %}
18137 ins_encode %{
18138 int vlen_enc = vector_length_encoding(this, $src);
18139 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18140 %}
18141 ins_pipe( pipe_slow );
18142 %}
18143
18144 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18145 predicate(n->as_StoreVector()->memory_size() >= 16);
18146 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18147 format %{ "vcvtps2ph $mem,$src" %}
18148 ins_encode %{
18149 int vlen_enc = vector_length_encoding(this, $src);
18150 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18151 %}
18152 ins_pipe( pipe_slow );
18153 %}
18154
18155 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18156 match(Set dst (ConvHF2F src));
18157 format %{ "vcvtph2ps $dst,$src" %}
18158 ins_encode %{
18159 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18160 %}
18161 ins_pipe( pipe_slow );
18162 %}
18163
18164 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18165 match(Set dst (VectorCastHF2F (LoadVector mem)));
18166 format %{ "vcvtph2ps $dst,$mem" %}
18167 ins_encode %{
18168 int vlen_enc = vector_length_encoding(this);
18169 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18170 %}
18171 ins_pipe( pipe_slow );
18172 %}
18173
18174 instruct vconvHF2F(vec dst, vec src) %{
18175 match(Set dst (VectorCastHF2F src));
18176 ins_cost(125);
18177 format %{ "vector_conv_HF2F $dst,$src" %}
18178 ins_encode %{
18179 int vlen_enc = vector_length_encoding(this);
18180 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18181 %}
18182 ins_pipe( pipe_slow );
18183 %}
18184
18185 // ---------------------------------------- VectorReinterpret ------------------------------------
18186 instruct reinterpret_mask(kReg dst) %{
18187 predicate(n->bottom_type()->isa_vectmask() &&
18188 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18189 match(Set dst (VectorReinterpret dst));
18190 ins_cost(125);
18191 format %{ "vector_reinterpret $dst\t!" %}
18192 ins_encode %{
18193 // empty
18194 %}
18195 ins_pipe( pipe_slow );
18196 %}
18197
18198 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18199 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18200 n->bottom_type()->isa_vectmask() &&
18201 n->in(1)->bottom_type()->isa_vectmask() &&
18202 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18203 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18204 match(Set dst (VectorReinterpret src));
18205 effect(TEMP xtmp);
18206 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18207 ins_encode %{
18208 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18209 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18210 assert(src_sz == dst_sz , "src and dst size mismatch");
18211 int vlen_enc = vector_length_encoding(src_sz);
18212 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18213 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18214 %}
18215 ins_pipe( pipe_slow );
18216 %}
18217
18218 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18219 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18220 n->bottom_type()->isa_vectmask() &&
18221 n->in(1)->bottom_type()->isa_vectmask() &&
18222 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18223 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18224 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18225 match(Set dst (VectorReinterpret src));
18226 effect(TEMP xtmp);
18227 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18228 ins_encode %{
18229 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18230 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18231 assert(src_sz == dst_sz , "src and dst size mismatch");
18232 int vlen_enc = vector_length_encoding(src_sz);
18233 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18234 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18235 %}
18236 ins_pipe( pipe_slow );
18237 %}
18238
18239 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18240 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18241 n->bottom_type()->isa_vectmask() &&
18242 n->in(1)->bottom_type()->isa_vectmask() &&
18243 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18244 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18245 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18246 match(Set dst (VectorReinterpret src));
18247 effect(TEMP xtmp);
18248 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18249 ins_encode %{
18250 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18251 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18252 assert(src_sz == dst_sz , "src and dst size mismatch");
18253 int vlen_enc = vector_length_encoding(src_sz);
18254 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18255 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18256 %}
18257 ins_pipe( pipe_slow );
18258 %}
18259
18260 instruct reinterpret(vec dst) %{
18261 predicate(!n->bottom_type()->isa_vectmask() &&
18262 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18263 match(Set dst (VectorReinterpret dst));
18264 ins_cost(125);
18265 format %{ "vector_reinterpret $dst\t!" %}
18266 ins_encode %{
18267 // empty
18268 %}
18269 ins_pipe( pipe_slow );
18270 %}
18271
18272 instruct reinterpret_expand(vec dst, vec src) %{
18273 predicate(UseAVX == 0 &&
18274 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18275 match(Set dst (VectorReinterpret src));
18276 ins_cost(125);
18277 effect(TEMP dst);
18278 format %{ "vector_reinterpret_expand $dst,$src" %}
18279 ins_encode %{
18280 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18281 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18282
18283 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18284 if (src_vlen_in_bytes == 4) {
18285 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18286 } else {
18287 assert(src_vlen_in_bytes == 8, "");
18288 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18289 }
18290 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18291 %}
18292 ins_pipe( pipe_slow );
18293 %}
18294
18295 instruct vreinterpret_expand4(legVec dst, vec src) %{
18296 predicate(UseAVX > 0 &&
18297 !n->bottom_type()->isa_vectmask() &&
18298 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18299 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18300 match(Set dst (VectorReinterpret src));
18301 ins_cost(125);
18302 format %{ "vector_reinterpret_expand $dst,$src" %}
18303 ins_encode %{
18304 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18305 %}
18306 ins_pipe( pipe_slow );
18307 %}
18308
18309
18310 instruct vreinterpret_expand(legVec dst, vec src) %{
18311 predicate(UseAVX > 0 &&
18312 !n->bottom_type()->isa_vectmask() &&
18313 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18314 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18315 match(Set dst (VectorReinterpret src));
18316 ins_cost(125);
18317 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18318 ins_encode %{
18319 switch (Matcher::vector_length_in_bytes(this, $src)) {
18320 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18321 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18322 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18323 default: ShouldNotReachHere();
18324 }
18325 %}
18326 ins_pipe( pipe_slow );
18327 %}
18328
18329 instruct reinterpret_shrink(vec dst, legVec src) %{
18330 predicate(!n->bottom_type()->isa_vectmask() &&
18331 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18332 match(Set dst (VectorReinterpret src));
18333 ins_cost(125);
18334 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18335 ins_encode %{
18336 switch (Matcher::vector_length_in_bytes(this)) {
18337 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18338 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18339 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18340 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18341 default: ShouldNotReachHere();
18342 }
18343 %}
18344 ins_pipe( pipe_slow );
18345 %}
18346
18347 // ----------------------------------------------------------------------------------------------------
18348
18349 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18350 match(Set dst (RoundDoubleMode src rmode));
18351 format %{ "roundsd $dst,$src" %}
18352 ins_cost(150);
18353 ins_encode %{
18354 assert(UseSSE >= 4, "required");
18355 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18356 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18357 }
18358 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18359 %}
18360 ins_pipe(pipe_slow);
18361 %}
18362
18363 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18364 match(Set dst (RoundDoubleMode con rmode));
18365 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18366 ins_cost(150);
18367 ins_encode %{
18368 assert(UseSSE >= 4, "required");
18369 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18370 %}
18371 ins_pipe(pipe_slow);
18372 %}
18373
18374 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18375 predicate(Matcher::vector_length(n) < 8);
18376 match(Set dst (RoundDoubleModeV src rmode));
18377 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18378 ins_encode %{
18379 assert(UseAVX > 0, "required");
18380 int vlen_enc = vector_length_encoding(this);
18381 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18382 %}
18383 ins_pipe( pipe_slow );
18384 %}
18385
18386 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18387 predicate(Matcher::vector_length(n) == 8);
18388 match(Set dst (RoundDoubleModeV src rmode));
18389 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18390 ins_encode %{
18391 assert(UseAVX > 2, "required");
18392 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18393 %}
18394 ins_pipe( pipe_slow );
18395 %}
18396
18397 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18398 predicate(Matcher::vector_length(n) < 8);
18399 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18400 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18401 ins_encode %{
18402 assert(UseAVX > 0, "required");
18403 int vlen_enc = vector_length_encoding(this);
18404 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18405 %}
18406 ins_pipe( pipe_slow );
18407 %}
18408
18409 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18410 predicate(Matcher::vector_length(n) == 8);
18411 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18412 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18413 ins_encode %{
18414 assert(UseAVX > 2, "required");
18415 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18416 %}
18417 ins_pipe( pipe_slow );
18418 %}
18419
18420 instruct onspinwait() %{
18421 match(OnSpinWait);
18422 ins_cost(200);
18423
18424 format %{
18425 $$template
18426 $$emit$$"pause\t! membar_onspinwait"
18427 %}
18428 ins_encode %{
18429 __ pause();
18430 %}
18431 ins_pipe(pipe_slow);
18432 %}
18433
18434 // a * b + c
18435 instruct fmaD_reg(regD a, regD b, regD c) %{
18436 match(Set c (FmaD c (Binary a b)));
18437 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18438 ins_cost(150);
18439 ins_encode %{
18440 assert(UseFMA, "Needs FMA instructions support.");
18441 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18442 %}
18443 ins_pipe( pipe_slow );
18444 %}
18445
18446 // a * b + c
18447 instruct fmaF_reg(regF a, regF b, regF c) %{
18448 match(Set c (FmaF c (Binary a b)));
18449 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18450 ins_cost(150);
18451 ins_encode %{
18452 assert(UseFMA, "Needs FMA instructions support.");
18453 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18454 %}
18455 ins_pipe( pipe_slow );
18456 %}
18457
18458 // ====================VECTOR INSTRUCTIONS=====================================
18459
18460 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18461 instruct MoveVec2Leg(legVec dst, vec src) %{
18462 match(Set dst src);
18463 format %{ "" %}
18464 ins_encode %{
18465 ShouldNotReachHere();
18466 %}
18467 ins_pipe( fpu_reg_reg );
18468 %}
18469
18470 instruct MoveLeg2Vec(vec dst, legVec src) %{
18471 match(Set dst src);
18472 format %{ "" %}
18473 ins_encode %{
18474 ShouldNotReachHere();
18475 %}
18476 ins_pipe( fpu_reg_reg );
18477 %}
18478
18479 // ============================================================================
18480
18481 // Load vectors generic operand pattern
18482 instruct loadV(vec dst, memory mem) %{
18483 match(Set dst (LoadVector mem));
18484 ins_cost(125);
18485 format %{ "load_vector $dst,$mem" %}
18486 ins_encode %{
18487 BasicType bt = Matcher::vector_element_basic_type(this);
18488 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18489 %}
18490 ins_pipe( pipe_slow );
18491 %}
18492
18493 // Store vectors generic operand pattern.
18494 instruct storeV(memory mem, vec src) %{
18495 match(Set mem (StoreVector mem src));
18496 ins_cost(145);
18497 format %{ "store_vector $mem,$src\n\t" %}
18498 ins_encode %{
18499 switch (Matcher::vector_length_in_bytes(this, $src)) {
18500 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18501 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18502 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18503 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18504 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18505 default: ShouldNotReachHere();
18506 }
18507 %}
18508 ins_pipe( pipe_slow );
18509 %}
18510
18511 // ---------------------------------------- Gather ------------------------------------
18512
18513 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18514
18515 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18516 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18517 Matcher::vector_length_in_bytes(n) <= 32);
18518 match(Set dst (LoadVectorGather mem idx));
18519 effect(TEMP dst, TEMP tmp, TEMP mask);
18520 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18521 ins_encode %{
18522 int vlen_enc = vector_length_encoding(this);
18523 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18524 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18525 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18526 __ lea($tmp$$Register, $mem$$Address);
18527 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18528 %}
18529 ins_pipe( pipe_slow );
18530 %}
18531
18532
18533 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18534 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18535 !is_subword_type(Matcher::vector_element_basic_type(n)));
18536 match(Set dst (LoadVectorGather mem idx));
18537 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18538 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18539 ins_encode %{
18540 int vlen_enc = vector_length_encoding(this);
18541 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18542 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18543 __ lea($tmp$$Register, $mem$$Address);
18544 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18545 %}
18546 ins_pipe( pipe_slow );
18547 %}
18548
18549 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18550 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18551 !is_subword_type(Matcher::vector_element_basic_type(n)));
18552 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18553 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18554 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18555 ins_encode %{
18556 assert(UseAVX > 2, "sanity");
18557 int vlen_enc = vector_length_encoding(this);
18558 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18559 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18560 // Note: Since gather instruction partially updates the opmask register used
18561 // for predication hense moving mask operand to a temporary.
18562 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18563 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18564 __ lea($tmp$$Register, $mem$$Address);
18565 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18566 %}
18567 ins_pipe( pipe_slow );
18568 %}
18569
18570 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18571 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18572 match(Set dst (LoadVectorGather mem idx_base));
18573 effect(TEMP tmp, TEMP rtmp);
18574 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18575 ins_encode %{
18576 int vlen_enc = vector_length_encoding(this);
18577 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18578 __ lea($tmp$$Register, $mem$$Address);
18579 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18580 %}
18581 ins_pipe( pipe_slow );
18582 %}
18583
18584 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18585 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18586 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18587 match(Set dst (LoadVectorGather mem idx_base));
18588 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18589 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18590 ins_encode %{
18591 int vlen_enc = vector_length_encoding(this);
18592 int vector_len = Matcher::vector_length(this);
18593 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18594 __ lea($tmp$$Register, $mem$$Address);
18595 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18596 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18597 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18598 %}
18599 ins_pipe( pipe_slow );
18600 %}
18601
18602 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18603 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18604 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18605 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18606 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18607 ins_encode %{
18608 int vlen_enc = vector_length_encoding(this);
18609 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18610 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18611 __ lea($tmp$$Register, $mem$$Address);
18612 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18613 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18614 %}
18615 ins_pipe( pipe_slow );
18616 %}
18617
18618 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18619 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18620 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18621 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18622 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18623 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18624 ins_encode %{
18625 int vlen_enc = vector_length_encoding(this);
18626 int vector_len = Matcher::vector_length(this);
18627 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18628 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18629 __ lea($tmp$$Register, $mem$$Address);
18630 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18631 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18632 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18633 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18634 %}
18635 ins_pipe( pipe_slow );
18636 %}
18637
18638 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18639 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18640 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18641 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18642 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18643 ins_encode %{
18644 int vlen_enc = vector_length_encoding(this);
18645 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18646 __ lea($tmp$$Register, $mem$$Address);
18647 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18648 if (elem_bt == T_SHORT) {
18649 __ movl($mask_idx$$Register, 0x55555555);
18650 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18651 }
18652 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18653 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18654 %}
18655 ins_pipe( pipe_slow );
18656 %}
18657
18658 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18659 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18660 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18661 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18662 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18663 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18664 ins_encode %{
18665 int vlen_enc = vector_length_encoding(this);
18666 int vector_len = Matcher::vector_length(this);
18667 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18668 __ lea($tmp$$Register, $mem$$Address);
18669 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18670 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18671 if (elem_bt == T_SHORT) {
18672 __ movl($mask_idx$$Register, 0x55555555);
18673 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18674 }
18675 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18676 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18677 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18678 %}
18679 ins_pipe( pipe_slow );
18680 %}
18681
18682 // ====================Scatter=======================================
18683
18684 // Scatter INT, LONG, FLOAT, DOUBLE
18685
18686 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18687 predicate(UseAVX > 2);
18688 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18689 effect(TEMP tmp, TEMP ktmp);
18690 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18691 ins_encode %{
18692 int vlen_enc = vector_length_encoding(this, $src);
18693 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18694
18695 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18696 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18697
18698 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18699 __ lea($tmp$$Register, $mem$$Address);
18700 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18701 %}
18702 ins_pipe( pipe_slow );
18703 %}
18704
18705 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18706 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18707 effect(TEMP tmp, TEMP ktmp);
18708 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18709 ins_encode %{
18710 int vlen_enc = vector_length_encoding(this, $src);
18711 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18712 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18713 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18714 // Note: Since scatter instruction partially updates the opmask register used
18715 // for predication hense moving mask operand to a temporary.
18716 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18717 __ lea($tmp$$Register, $mem$$Address);
18718 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18719 %}
18720 ins_pipe( pipe_slow );
18721 %}
18722
18723 // ====================REPLICATE=======================================
18724
18725 // Replicate byte scalar to be vector
18726 instruct vReplB_reg(vec dst, rRegI src) %{
18727 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18728 match(Set dst (Replicate src));
18729 format %{ "replicateB $dst,$src" %}
18730 ins_encode %{
18731 uint vlen = Matcher::vector_length(this);
18732 if (UseAVX >= 2) {
18733 int vlen_enc = vector_length_encoding(this);
18734 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18735 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18736 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18737 } else {
18738 __ movdl($dst$$XMMRegister, $src$$Register);
18739 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18740 }
18741 } else {
18742 assert(UseAVX < 2, "");
18743 __ movdl($dst$$XMMRegister, $src$$Register);
18744 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18745 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18746 if (vlen >= 16) {
18747 assert(vlen == 16, "");
18748 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18749 }
18750 }
18751 %}
18752 ins_pipe( pipe_slow );
18753 %}
18754
18755 instruct ReplB_mem(vec dst, memory mem) %{
18756 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18757 match(Set dst (Replicate (LoadB mem)));
18758 format %{ "replicateB $dst,$mem" %}
18759 ins_encode %{
18760 int vlen_enc = vector_length_encoding(this);
18761 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18762 %}
18763 ins_pipe( pipe_slow );
18764 %}
18765
18766 // ====================ReplicateS=======================================
18767
18768 instruct vReplS_reg(vec dst, rRegI src) %{
18769 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18770 match(Set dst (Replicate src));
18771 format %{ "replicateS $dst,$src" %}
18772 ins_encode %{
18773 uint vlen = Matcher::vector_length(this);
18774 int vlen_enc = vector_length_encoding(this);
18775 if (UseAVX >= 2) {
18776 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18777 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18778 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18779 } else {
18780 __ movdl($dst$$XMMRegister, $src$$Register);
18781 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18782 }
18783 } else {
18784 assert(UseAVX < 2, "");
18785 __ movdl($dst$$XMMRegister, $src$$Register);
18786 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18787 if (vlen >= 8) {
18788 assert(vlen == 8, "");
18789 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18790 }
18791 }
18792 %}
18793 ins_pipe( pipe_slow );
18794 %}
18795
18796 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18797 match(Set dst (Replicate con));
18798 effect(TEMP rtmp);
18799 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18800 ins_encode %{
18801 int vlen_enc = vector_length_encoding(this);
18802 BasicType bt = Matcher::vector_element_basic_type(this);
18803 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18804 __ movl($rtmp$$Register, $con$$constant);
18805 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18806 %}
18807 ins_pipe( pipe_slow );
18808 %}
18809
18810 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18811 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18812 match(Set dst (Replicate src));
18813 effect(TEMP rtmp);
18814 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18815 ins_encode %{
18816 int vlen_enc = vector_length_encoding(this);
18817 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18818 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18819 %}
18820 ins_pipe( pipe_slow );
18821 %}
18822
18823 instruct ReplS_mem(vec dst, memory mem) %{
18824 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18825 match(Set dst (Replicate (LoadS mem)));
18826 format %{ "replicateS $dst,$mem" %}
18827 ins_encode %{
18828 int vlen_enc = vector_length_encoding(this);
18829 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 // ====================ReplicateI=======================================
18835
18836 instruct ReplI_reg(vec dst, rRegI src) %{
18837 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18838 match(Set dst (Replicate src));
18839 format %{ "replicateI $dst,$src" %}
18840 ins_encode %{
18841 uint vlen = Matcher::vector_length(this);
18842 int vlen_enc = vector_length_encoding(this);
18843 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18844 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18845 } else if (VM_Version::supports_avx2()) {
18846 __ movdl($dst$$XMMRegister, $src$$Register);
18847 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18848 } else {
18849 __ movdl($dst$$XMMRegister, $src$$Register);
18850 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18851 }
18852 %}
18853 ins_pipe( pipe_slow );
18854 %}
18855
18856 instruct ReplI_mem(vec dst, memory mem) %{
18857 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18858 match(Set dst (Replicate (LoadI mem)));
18859 format %{ "replicateI $dst,$mem" %}
18860 ins_encode %{
18861 int vlen_enc = vector_length_encoding(this);
18862 if (VM_Version::supports_avx2()) {
18863 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18864 } else if (VM_Version::supports_avx()) {
18865 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18866 } else {
18867 __ movdl($dst$$XMMRegister, $mem$$Address);
18868 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18869 }
18870 %}
18871 ins_pipe( pipe_slow );
18872 %}
18873
18874 instruct ReplI_imm(vec dst, immI con) %{
18875 predicate(Matcher::is_non_long_integral_vector(n));
18876 match(Set dst (Replicate con));
18877 format %{ "replicateI $dst,$con" %}
18878 ins_encode %{
18879 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18880 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18881 type2aelembytes(Matcher::vector_element_basic_type(this))));
18882 BasicType bt = Matcher::vector_element_basic_type(this);
18883 int vlen = Matcher::vector_length_in_bytes(this);
18884 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18885 %}
18886 ins_pipe( pipe_slow );
18887 %}
18888
18889 // Replicate scalar zero to be vector
18890 instruct ReplI_zero(vec dst, immI_0 zero) %{
18891 predicate(Matcher::is_non_long_integral_vector(n));
18892 match(Set dst (Replicate zero));
18893 format %{ "replicateI $dst,$zero" %}
18894 ins_encode %{
18895 int vlen_enc = vector_length_encoding(this);
18896 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18897 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18898 } else {
18899 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18900 }
18901 %}
18902 ins_pipe( fpu_reg_reg );
18903 %}
18904
18905 instruct ReplI_M1(vec dst, immI_M1 con) %{
18906 predicate(Matcher::is_non_long_integral_vector(n));
18907 match(Set dst (Replicate con));
18908 format %{ "vallones $dst" %}
18909 ins_encode %{
18910 int vector_len = vector_length_encoding(this);
18911 __ vallones($dst$$XMMRegister, vector_len);
18912 %}
18913 ins_pipe( pipe_slow );
18914 %}
18915
18916 // ====================ReplicateL=======================================
18917
18918 // Replicate long (8 byte) scalar to be vector
18919 instruct ReplL_reg(vec dst, rRegL src) %{
18920 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18921 match(Set dst (Replicate src));
18922 format %{ "replicateL $dst,$src" %}
18923 ins_encode %{
18924 int vlen = Matcher::vector_length(this);
18925 int vlen_enc = vector_length_encoding(this);
18926 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18927 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18928 } else if (VM_Version::supports_avx2()) {
18929 __ movdq($dst$$XMMRegister, $src$$Register);
18930 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18931 } else {
18932 __ movdq($dst$$XMMRegister, $src$$Register);
18933 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18934 }
18935 %}
18936 ins_pipe( pipe_slow );
18937 %}
18938
18939 instruct ReplL_mem(vec dst, memory mem) %{
18940 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18941 match(Set dst (Replicate (LoadL mem)));
18942 format %{ "replicateL $dst,$mem" %}
18943 ins_encode %{
18944 int vlen_enc = vector_length_encoding(this);
18945 if (VM_Version::supports_avx2()) {
18946 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18947 } else if (VM_Version::supports_sse3()) {
18948 __ movddup($dst$$XMMRegister, $mem$$Address);
18949 } else {
18950 __ movq($dst$$XMMRegister, $mem$$Address);
18951 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18952 }
18953 %}
18954 ins_pipe( pipe_slow );
18955 %}
18956
18957 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18958 instruct ReplL_imm(vec dst, immL con) %{
18959 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18960 match(Set dst (Replicate con));
18961 format %{ "replicateL $dst,$con" %}
18962 ins_encode %{
18963 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18964 int vlen = Matcher::vector_length_in_bytes(this);
18965 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18966 %}
18967 ins_pipe( pipe_slow );
18968 %}
18969
18970 instruct ReplL_zero(vec dst, immL0 zero) %{
18971 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18972 match(Set dst (Replicate zero));
18973 format %{ "replicateL $dst,$zero" %}
18974 ins_encode %{
18975 int vlen_enc = vector_length_encoding(this);
18976 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18977 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18978 } else {
18979 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18980 }
18981 %}
18982 ins_pipe( fpu_reg_reg );
18983 %}
18984
18985 instruct ReplL_M1(vec dst, immL_M1 con) %{
18986 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18987 match(Set dst (Replicate con));
18988 format %{ "vallones $dst" %}
18989 ins_encode %{
18990 int vector_len = vector_length_encoding(this);
18991 __ vallones($dst$$XMMRegister, vector_len);
18992 %}
18993 ins_pipe( pipe_slow );
18994 %}
18995
18996 // ====================ReplicateF=======================================
18997
18998 instruct vReplF_reg(vec dst, vlRegF src) %{
18999 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19000 match(Set dst (Replicate src));
19001 format %{ "replicateF $dst,$src" %}
19002 ins_encode %{
19003 uint vlen = Matcher::vector_length(this);
19004 int vlen_enc = vector_length_encoding(this);
19005 if (vlen <= 4) {
19006 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19007 } else if (VM_Version::supports_avx2()) {
19008 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19009 } else {
19010 assert(vlen == 8, "sanity");
19011 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19012 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19013 }
19014 %}
19015 ins_pipe( pipe_slow );
19016 %}
19017
19018 instruct ReplF_reg(vec dst, vlRegF src) %{
19019 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19020 match(Set dst (Replicate src));
19021 format %{ "replicateF $dst,$src" %}
19022 ins_encode %{
19023 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19024 %}
19025 ins_pipe( pipe_slow );
19026 %}
19027
19028 instruct ReplF_mem(vec dst, memory mem) %{
19029 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19030 match(Set dst (Replicate (LoadF mem)));
19031 format %{ "replicateF $dst,$mem" %}
19032 ins_encode %{
19033 int vlen_enc = vector_length_encoding(this);
19034 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19035 %}
19036 ins_pipe( pipe_slow );
19037 %}
19038
19039 // Replicate float scalar immediate to be vector by loading from const table.
19040 instruct ReplF_imm(vec dst, immF con) %{
19041 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19042 match(Set dst (Replicate con));
19043 format %{ "replicateF $dst,$con" %}
19044 ins_encode %{
19045 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19046 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19047 int vlen = Matcher::vector_length_in_bytes(this);
19048 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19049 %}
19050 ins_pipe( pipe_slow );
19051 %}
19052
19053 instruct ReplF_zero(vec dst, immF0 zero) %{
19054 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19055 match(Set dst (Replicate zero));
19056 format %{ "replicateF $dst,$zero" %}
19057 ins_encode %{
19058 int vlen_enc = vector_length_encoding(this);
19059 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19060 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19061 } else {
19062 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19063 }
19064 %}
19065 ins_pipe( fpu_reg_reg );
19066 %}
19067
19068 // ====================ReplicateD=======================================
19069
19070 // Replicate double (8 bytes) scalar to be vector
19071 instruct vReplD_reg(vec dst, vlRegD src) %{
19072 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19073 match(Set dst (Replicate src));
19074 format %{ "replicateD $dst,$src" %}
19075 ins_encode %{
19076 uint vlen = Matcher::vector_length(this);
19077 int vlen_enc = vector_length_encoding(this);
19078 if (vlen <= 2) {
19079 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19080 } else if (VM_Version::supports_avx2()) {
19081 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19082 } else {
19083 assert(vlen == 4, "sanity");
19084 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19085 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19086 }
19087 %}
19088 ins_pipe( pipe_slow );
19089 %}
19090
19091 instruct ReplD_reg(vec dst, vlRegD src) %{
19092 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19093 match(Set dst (Replicate src));
19094 format %{ "replicateD $dst,$src" %}
19095 ins_encode %{
19096 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19097 %}
19098 ins_pipe( pipe_slow );
19099 %}
19100
19101 instruct ReplD_mem(vec dst, memory mem) %{
19102 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19103 match(Set dst (Replicate (LoadD mem)));
19104 format %{ "replicateD $dst,$mem" %}
19105 ins_encode %{
19106 if (Matcher::vector_length(this) >= 4) {
19107 int vlen_enc = vector_length_encoding(this);
19108 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19109 } else {
19110 __ movddup($dst$$XMMRegister, $mem$$Address);
19111 }
19112 %}
19113 ins_pipe( pipe_slow );
19114 %}
19115
19116 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19117 instruct ReplD_imm(vec dst, immD con) %{
19118 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19119 match(Set dst (Replicate con));
19120 format %{ "replicateD $dst,$con" %}
19121 ins_encode %{
19122 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19123 int vlen = Matcher::vector_length_in_bytes(this);
19124 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19125 %}
19126 ins_pipe( pipe_slow );
19127 %}
19128
19129 instruct ReplD_zero(vec dst, immD0 zero) %{
19130 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19131 match(Set dst (Replicate zero));
19132 format %{ "replicateD $dst,$zero" %}
19133 ins_encode %{
19134 int vlen_enc = vector_length_encoding(this);
19135 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19136 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19137 } else {
19138 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19139 }
19140 %}
19141 ins_pipe( fpu_reg_reg );
19142 %}
19143
19144 // ====================VECTOR INSERT=======================================
19145
19146 instruct insert(vec dst, rRegI val, immU8 idx) %{
19147 predicate(Matcher::vector_length_in_bytes(n) < 32);
19148 match(Set dst (VectorInsert (Binary dst val) idx));
19149 format %{ "vector_insert $dst,$val,$idx" %}
19150 ins_encode %{
19151 assert(UseSSE >= 4, "required");
19152 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19153
19154 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19155
19156 assert(is_integral_type(elem_bt), "");
19157 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19158
19159 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19160 %}
19161 ins_pipe( pipe_slow );
19162 %}
19163
19164 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19165 predicate(Matcher::vector_length_in_bytes(n) == 32);
19166 match(Set dst (VectorInsert (Binary src val) idx));
19167 effect(TEMP vtmp);
19168 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19169 ins_encode %{
19170 int vlen_enc = Assembler::AVX_256bit;
19171 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19172 int elem_per_lane = 16/type2aelembytes(elem_bt);
19173 int log2epr = log2(elem_per_lane);
19174
19175 assert(is_integral_type(elem_bt), "sanity");
19176 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19177
19178 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19179 uint y_idx = ($idx$$constant >> log2epr) & 1;
19180 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19181 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19182 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19183 %}
19184 ins_pipe( pipe_slow );
19185 %}
19186
19187 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19188 predicate(Matcher::vector_length_in_bytes(n) == 64);
19189 match(Set dst (VectorInsert (Binary src val) idx));
19190 effect(TEMP vtmp);
19191 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19192 ins_encode %{
19193 assert(UseAVX > 2, "sanity");
19194
19195 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19196 int elem_per_lane = 16/type2aelembytes(elem_bt);
19197 int log2epr = log2(elem_per_lane);
19198
19199 assert(is_integral_type(elem_bt), "");
19200 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19201
19202 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19203 uint y_idx = ($idx$$constant >> log2epr) & 3;
19204 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19205 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19206 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19207 %}
19208 ins_pipe( pipe_slow );
19209 %}
19210
19211 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19212 predicate(Matcher::vector_length(n) == 2);
19213 match(Set dst (VectorInsert (Binary dst val) idx));
19214 format %{ "vector_insert $dst,$val,$idx" %}
19215 ins_encode %{
19216 assert(UseSSE >= 4, "required");
19217 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19218 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19219
19220 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19221 %}
19222 ins_pipe( pipe_slow );
19223 %}
19224
19225 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19226 predicate(Matcher::vector_length(n) == 4);
19227 match(Set dst (VectorInsert (Binary src val) idx));
19228 effect(TEMP vtmp);
19229 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19230 ins_encode %{
19231 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19232 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19233
19234 uint x_idx = $idx$$constant & right_n_bits(1);
19235 uint y_idx = ($idx$$constant >> 1) & 1;
19236 int vlen_enc = Assembler::AVX_256bit;
19237 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19238 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19239 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19240 %}
19241 ins_pipe( pipe_slow );
19242 %}
19243
19244 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19245 predicate(Matcher::vector_length(n) == 8);
19246 match(Set dst (VectorInsert (Binary src val) idx));
19247 effect(TEMP vtmp);
19248 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19249 ins_encode %{
19250 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19251 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19252
19253 uint x_idx = $idx$$constant & right_n_bits(1);
19254 uint y_idx = ($idx$$constant >> 1) & 3;
19255 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19256 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19257 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19258 %}
19259 ins_pipe( pipe_slow );
19260 %}
19261
19262 instruct insertF(vec dst, regF val, immU8 idx) %{
19263 predicate(Matcher::vector_length(n) < 8);
19264 match(Set dst (VectorInsert (Binary dst val) idx));
19265 format %{ "vector_insert $dst,$val,$idx" %}
19266 ins_encode %{
19267 assert(UseSSE >= 4, "sanity");
19268
19269 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19270 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19271
19272 uint x_idx = $idx$$constant & right_n_bits(2);
19273 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19274 %}
19275 ins_pipe( pipe_slow );
19276 %}
19277
19278 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19279 predicate(Matcher::vector_length(n) >= 8);
19280 match(Set dst (VectorInsert (Binary src val) idx));
19281 effect(TEMP vtmp);
19282 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19283 ins_encode %{
19284 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19285 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19286
19287 int vlen = Matcher::vector_length(this);
19288 uint x_idx = $idx$$constant & right_n_bits(2);
19289 if (vlen == 8) {
19290 uint y_idx = ($idx$$constant >> 2) & 1;
19291 int vlen_enc = Assembler::AVX_256bit;
19292 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19293 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19294 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19295 } else {
19296 assert(vlen == 16, "sanity");
19297 uint y_idx = ($idx$$constant >> 2) & 3;
19298 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19299 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19300 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19301 }
19302 %}
19303 ins_pipe( pipe_slow );
19304 %}
19305
19306 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19307 predicate(Matcher::vector_length(n) == 2);
19308 match(Set dst (VectorInsert (Binary dst val) idx));
19309 effect(TEMP tmp);
19310 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19311 ins_encode %{
19312 assert(UseSSE >= 4, "sanity");
19313 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19314 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19315
19316 __ movq($tmp$$Register, $val$$XMMRegister);
19317 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19318 %}
19319 ins_pipe( pipe_slow );
19320 %}
19321
19322 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19323 predicate(Matcher::vector_length(n) == 4);
19324 match(Set dst (VectorInsert (Binary src val) idx));
19325 effect(TEMP vtmp, TEMP tmp);
19326 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19327 ins_encode %{
19328 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19329 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19330
19331 uint x_idx = $idx$$constant & right_n_bits(1);
19332 uint y_idx = ($idx$$constant >> 1) & 1;
19333 int vlen_enc = Assembler::AVX_256bit;
19334 __ movq($tmp$$Register, $val$$XMMRegister);
19335 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19336 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19337 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19338 %}
19339 ins_pipe( pipe_slow );
19340 %}
19341
19342 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19343 predicate(Matcher::vector_length(n) == 8);
19344 match(Set dst (VectorInsert (Binary src val) idx));
19345 effect(TEMP tmp, TEMP vtmp);
19346 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19347 ins_encode %{
19348 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19349 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19350
19351 uint x_idx = $idx$$constant & right_n_bits(1);
19352 uint y_idx = ($idx$$constant >> 1) & 3;
19353 __ movq($tmp$$Register, $val$$XMMRegister);
19354 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19355 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19356 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19357 %}
19358 ins_pipe( pipe_slow );
19359 %}
19360
19361 // ====================REDUCTION ARITHMETIC=======================================
19362
19363 // =======================Int Reduction==========================================
19364
19365 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19366 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19367 match(Set dst (AddReductionVI src1 src2));
19368 match(Set dst (MulReductionVI src1 src2));
19369 match(Set dst (AndReductionV src1 src2));
19370 match(Set dst ( OrReductionV src1 src2));
19371 match(Set dst (XorReductionV src1 src2));
19372 match(Set dst (MinReductionV src1 src2));
19373 match(Set dst (MaxReductionV src1 src2));
19374 match(Set dst (UMinReductionV src1 src2));
19375 match(Set dst (UMaxReductionV src1 src2));
19376 effect(TEMP vtmp1, TEMP vtmp2);
19377 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19378 ins_encode %{
19379 int opcode = this->ideal_Opcode();
19380 int vlen = Matcher::vector_length(this, $src2);
19381 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19382 %}
19383 ins_pipe( pipe_slow );
19384 %}
19385
19386 // =======================Long Reduction==========================================
19387
19388 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19389 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19390 match(Set dst (AddReductionVL src1 src2));
19391 match(Set dst (MulReductionVL src1 src2));
19392 match(Set dst (AndReductionV src1 src2));
19393 match(Set dst ( OrReductionV src1 src2));
19394 match(Set dst (XorReductionV src1 src2));
19395 match(Set dst (MinReductionV src1 src2));
19396 match(Set dst (MaxReductionV src1 src2));
19397 match(Set dst (UMinReductionV src1 src2));
19398 match(Set dst (UMaxReductionV src1 src2));
19399 effect(TEMP vtmp1, TEMP vtmp2);
19400 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19401 ins_encode %{
19402 int opcode = this->ideal_Opcode();
19403 int vlen = Matcher::vector_length(this, $src2);
19404 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19405 %}
19406 ins_pipe( pipe_slow );
19407 %}
19408
19409 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19410 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19411 match(Set dst (AddReductionVL src1 src2));
19412 match(Set dst (MulReductionVL src1 src2));
19413 match(Set dst (AndReductionV src1 src2));
19414 match(Set dst ( OrReductionV src1 src2));
19415 match(Set dst (XorReductionV src1 src2));
19416 match(Set dst (MinReductionV src1 src2));
19417 match(Set dst (MaxReductionV src1 src2));
19418 match(Set dst (UMinReductionV src1 src2));
19419 match(Set dst (UMaxReductionV src1 src2));
19420 effect(TEMP vtmp1, TEMP vtmp2);
19421 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19422 ins_encode %{
19423 int opcode = this->ideal_Opcode();
19424 int vlen = Matcher::vector_length(this, $src2);
19425 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19426 %}
19427 ins_pipe( pipe_slow );
19428 %}
19429
19430 // =======================Float Reduction==========================================
19431
19432 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19433 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19434 match(Set dst (AddReductionVF dst src));
19435 match(Set dst (MulReductionVF dst src));
19436 effect(TEMP dst, TEMP vtmp);
19437 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19438 ins_encode %{
19439 int opcode = this->ideal_Opcode();
19440 int vlen = Matcher::vector_length(this, $src);
19441 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19442 %}
19443 ins_pipe( pipe_slow );
19444 %}
19445
19446 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19447 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19448 match(Set dst (AddReductionVF dst src));
19449 match(Set dst (MulReductionVF dst src));
19450 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19451 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19452 ins_encode %{
19453 int opcode = this->ideal_Opcode();
19454 int vlen = Matcher::vector_length(this, $src);
19455 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19456 %}
19457 ins_pipe( pipe_slow );
19458 %}
19459
19460 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19461 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19462 match(Set dst (AddReductionVF dst src));
19463 match(Set dst (MulReductionVF dst src));
19464 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19465 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19466 ins_encode %{
19467 int opcode = this->ideal_Opcode();
19468 int vlen = Matcher::vector_length(this, $src);
19469 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19470 %}
19471 ins_pipe( pipe_slow );
19472 %}
19473
19474
19475 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19476 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19477 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19478 // src1 contains reduction identity
19479 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19480 match(Set dst (AddReductionVF src1 src2));
19481 match(Set dst (MulReductionVF src1 src2));
19482 effect(TEMP dst);
19483 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19484 ins_encode %{
19485 int opcode = this->ideal_Opcode();
19486 int vlen = Matcher::vector_length(this, $src2);
19487 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19488 %}
19489 ins_pipe( pipe_slow );
19490 %}
19491
19492 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19493 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19494 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19495 // src1 contains reduction identity
19496 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19497 match(Set dst (AddReductionVF src1 src2));
19498 match(Set dst (MulReductionVF src1 src2));
19499 effect(TEMP dst, TEMP vtmp);
19500 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19501 ins_encode %{
19502 int opcode = this->ideal_Opcode();
19503 int vlen = Matcher::vector_length(this, $src2);
19504 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19505 %}
19506 ins_pipe( pipe_slow );
19507 %}
19508
19509 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19510 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19511 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19512 // src1 contains reduction identity
19513 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19514 match(Set dst (AddReductionVF src1 src2));
19515 match(Set dst (MulReductionVF src1 src2));
19516 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19517 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19518 ins_encode %{
19519 int opcode = this->ideal_Opcode();
19520 int vlen = Matcher::vector_length(this, $src2);
19521 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19522 %}
19523 ins_pipe( pipe_slow );
19524 %}
19525
19526 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19527 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19528 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19529 // src1 contains reduction identity
19530 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19531 match(Set dst (AddReductionVF src1 src2));
19532 match(Set dst (MulReductionVF src1 src2));
19533 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19534 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19535 ins_encode %{
19536 int opcode = this->ideal_Opcode();
19537 int vlen = Matcher::vector_length(this, $src2);
19538 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19539 %}
19540 ins_pipe( pipe_slow );
19541 %}
19542
19543 // =======================Double Reduction==========================================
19544
19545 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19546 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19547 match(Set dst (AddReductionVD dst src));
19548 match(Set dst (MulReductionVD dst src));
19549 effect(TEMP dst, TEMP vtmp);
19550 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19551 ins_encode %{
19552 int opcode = this->ideal_Opcode();
19553 int vlen = Matcher::vector_length(this, $src);
19554 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19555 %}
19556 ins_pipe( pipe_slow );
19557 %}
19558
19559 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19560 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19561 match(Set dst (AddReductionVD dst src));
19562 match(Set dst (MulReductionVD dst src));
19563 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19564 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19565 ins_encode %{
19566 int opcode = this->ideal_Opcode();
19567 int vlen = Matcher::vector_length(this, $src);
19568 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19569 %}
19570 ins_pipe( pipe_slow );
19571 %}
19572
19573 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19574 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19575 match(Set dst (AddReductionVD dst src));
19576 match(Set dst (MulReductionVD dst src));
19577 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19578 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19579 ins_encode %{
19580 int opcode = this->ideal_Opcode();
19581 int vlen = Matcher::vector_length(this, $src);
19582 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19583 %}
19584 ins_pipe( pipe_slow );
19585 %}
19586
19587 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19588 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19589 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19590 // src1 contains reduction identity
19591 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19592 match(Set dst (AddReductionVD src1 src2));
19593 match(Set dst (MulReductionVD src1 src2));
19594 effect(TEMP dst);
19595 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19596 ins_encode %{
19597 int opcode = this->ideal_Opcode();
19598 int vlen = Matcher::vector_length(this, $src2);
19599 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19600 %}
19601 ins_pipe( pipe_slow );
19602 %}
19603
19604 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19605 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19606 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19607 // src1 contains reduction identity
19608 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19609 match(Set dst (AddReductionVD src1 src2));
19610 match(Set dst (MulReductionVD src1 src2));
19611 effect(TEMP dst, TEMP vtmp);
19612 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19613 ins_encode %{
19614 int opcode = this->ideal_Opcode();
19615 int vlen = Matcher::vector_length(this, $src2);
19616 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19617 %}
19618 ins_pipe( pipe_slow );
19619 %}
19620
19621 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19622 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19623 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19624 // src1 contains reduction identity
19625 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19626 match(Set dst (AddReductionVD src1 src2));
19627 match(Set dst (MulReductionVD src1 src2));
19628 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19629 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19630 ins_encode %{
19631 int opcode = this->ideal_Opcode();
19632 int vlen = Matcher::vector_length(this, $src2);
19633 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19634 %}
19635 ins_pipe( pipe_slow );
19636 %}
19637
19638 // =======================Byte Reduction==========================================
19639
19640 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19641 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19642 match(Set dst (AddReductionVI src1 src2));
19643 match(Set dst (AndReductionV src1 src2));
19644 match(Set dst ( OrReductionV src1 src2));
19645 match(Set dst (XorReductionV src1 src2));
19646 match(Set dst (MinReductionV src1 src2));
19647 match(Set dst (MaxReductionV src1 src2));
19648 match(Set dst (UMinReductionV src1 src2));
19649 match(Set dst (UMaxReductionV src1 src2));
19650 effect(TEMP vtmp1, TEMP vtmp2);
19651 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19652 ins_encode %{
19653 int opcode = this->ideal_Opcode();
19654 int vlen = Matcher::vector_length(this, $src2);
19655 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19656 %}
19657 ins_pipe( pipe_slow );
19658 %}
19659
19660 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19661 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19662 match(Set dst (AddReductionVI src1 src2));
19663 match(Set dst (AndReductionV src1 src2));
19664 match(Set dst ( OrReductionV src1 src2));
19665 match(Set dst (XorReductionV src1 src2));
19666 match(Set dst (MinReductionV src1 src2));
19667 match(Set dst (MaxReductionV src1 src2));
19668 match(Set dst (UMinReductionV src1 src2));
19669 match(Set dst (UMaxReductionV src1 src2));
19670 effect(TEMP vtmp1, TEMP vtmp2);
19671 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19672 ins_encode %{
19673 int opcode = this->ideal_Opcode();
19674 int vlen = Matcher::vector_length(this, $src2);
19675 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19676 %}
19677 ins_pipe( pipe_slow );
19678 %}
19679
19680 // =======================Short Reduction==========================================
19681
19682 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19683 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19684 match(Set dst (AddReductionVI src1 src2));
19685 match(Set dst (MulReductionVI src1 src2));
19686 match(Set dst (AndReductionV src1 src2));
19687 match(Set dst ( OrReductionV src1 src2));
19688 match(Set dst (XorReductionV src1 src2));
19689 match(Set dst (MinReductionV src1 src2));
19690 match(Set dst (MaxReductionV src1 src2));
19691 match(Set dst (UMinReductionV src1 src2));
19692 match(Set dst (UMaxReductionV src1 src2));
19693 effect(TEMP vtmp1, TEMP vtmp2);
19694 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19695 ins_encode %{
19696 int opcode = this->ideal_Opcode();
19697 int vlen = Matcher::vector_length(this, $src2);
19698 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19699 %}
19700 ins_pipe( pipe_slow );
19701 %}
19702
19703 // =======================Mul Reduction==========================================
19704
19705 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19706 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19707 Matcher::vector_length(n->in(2)) <= 32); // src2
19708 match(Set dst (MulReductionVI src1 src2));
19709 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19710 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19711 ins_encode %{
19712 int opcode = this->ideal_Opcode();
19713 int vlen = Matcher::vector_length(this, $src2);
19714 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19715 %}
19716 ins_pipe( pipe_slow );
19717 %}
19718
19719 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19720 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19721 Matcher::vector_length(n->in(2)) == 64); // src2
19722 match(Set dst (MulReductionVI src1 src2));
19723 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19724 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19725 ins_encode %{
19726 int opcode = this->ideal_Opcode();
19727 int vlen = Matcher::vector_length(this, $src2);
19728 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19729 %}
19730 ins_pipe( pipe_slow );
19731 %}
19732
19733 //--------------------Min/Max Float Reduction --------------------
19734 // Float Min Reduction
19735 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19736 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19737 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19738 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19739 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19740 Matcher::vector_length(n->in(2)) == 2);
19741 match(Set dst (MinReductionV src1 src2));
19742 match(Set dst (MaxReductionV src1 src2));
19743 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19744 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19745 ins_encode %{
19746 assert(UseAVX > 0, "sanity");
19747
19748 int opcode = this->ideal_Opcode();
19749 int vlen = Matcher::vector_length(this, $src2);
19750 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19751 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19752 %}
19753 ins_pipe( pipe_slow );
19754 %}
19755
19756 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19757 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19758 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19759 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19760 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19761 Matcher::vector_length(n->in(2)) >= 4);
19762 match(Set dst (MinReductionV src1 src2));
19763 match(Set dst (MaxReductionV src1 src2));
19764 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19765 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19766 ins_encode %{
19767 assert(UseAVX > 0, "sanity");
19768
19769 int opcode = this->ideal_Opcode();
19770 int vlen = Matcher::vector_length(this, $src2);
19771 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19772 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19773 %}
19774 ins_pipe( pipe_slow );
19775 %}
19776
19777 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19778 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19779 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19780 Matcher::vector_length(n->in(2)) == 2);
19781 match(Set dst (MinReductionV dst src));
19782 match(Set dst (MaxReductionV dst src));
19783 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19784 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19785 ins_encode %{
19786 assert(UseAVX > 0, "sanity");
19787
19788 int opcode = this->ideal_Opcode();
19789 int vlen = Matcher::vector_length(this, $src);
19790 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19791 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19792 %}
19793 ins_pipe( pipe_slow );
19794 %}
19795
19796
19797 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19798 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19799 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19800 Matcher::vector_length(n->in(2)) >= 4);
19801 match(Set dst (MinReductionV dst src));
19802 match(Set dst (MaxReductionV dst src));
19803 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19804 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19805 ins_encode %{
19806 assert(UseAVX > 0, "sanity");
19807
19808 int opcode = this->ideal_Opcode();
19809 int vlen = Matcher::vector_length(this, $src);
19810 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19811 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19812 %}
19813 ins_pipe( pipe_slow );
19814 %}
19815
19816 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19817 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19818 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19819 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19820 Matcher::vector_length(n->in(2)) == 2);
19821 match(Set dst (MinReductionV src1 src2));
19822 match(Set dst (MaxReductionV src1 src2));
19823 effect(TEMP dst, TEMP xtmp1);
19824 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19825 ins_encode %{
19826 int opcode = this->ideal_Opcode();
19827 int vlen = Matcher::vector_length(this, $src2);
19828 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19829 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19830 %}
19831 ins_pipe( pipe_slow );
19832 %}
19833
19834 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19835 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19836 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19837 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19838 Matcher::vector_length(n->in(2)) >= 4);
19839 match(Set dst (MinReductionV src1 src2));
19840 match(Set dst (MaxReductionV src1 src2));
19841 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19842 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19843 ins_encode %{
19844 int opcode = this->ideal_Opcode();
19845 int vlen = Matcher::vector_length(this, $src2);
19846 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19847 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19848 %}
19849 ins_pipe( pipe_slow );
19850 %}
19851
19852 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19853 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19854 Matcher::vector_length(n->in(2)) == 2);
19855 match(Set dst (MinReductionV dst src));
19856 match(Set dst (MaxReductionV dst src));
19857 effect(TEMP dst, TEMP xtmp1);
19858 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19859 ins_encode %{
19860 int opcode = this->ideal_Opcode();
19861 int vlen = Matcher::vector_length(this, $src);
19862 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19863 $xtmp1$$XMMRegister);
19864 %}
19865 ins_pipe( pipe_slow );
19866 %}
19867
19868 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19869 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19870 Matcher::vector_length(n->in(2)) >= 4);
19871 match(Set dst (MinReductionV dst src));
19872 match(Set dst (MaxReductionV dst src));
19873 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19874 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19875 ins_encode %{
19876 int opcode = this->ideal_Opcode();
19877 int vlen = Matcher::vector_length(this, $src);
19878 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19879 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19880 %}
19881 ins_pipe( pipe_slow );
19882 %}
19883
19884 //--------------------Min Double Reduction --------------------
19885 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19886 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19887 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19888 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19889 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19890 Matcher::vector_length(n->in(2)) == 2);
19891 match(Set dst (MinReductionV src1 src2));
19892 match(Set dst (MaxReductionV src1 src2));
19893 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19894 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19895 ins_encode %{
19896 assert(UseAVX > 0, "sanity");
19897
19898 int opcode = this->ideal_Opcode();
19899 int vlen = Matcher::vector_length(this, $src2);
19900 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19901 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19902 %}
19903 ins_pipe( pipe_slow );
19904 %}
19905
19906 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19907 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19908 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19909 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19910 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19911 Matcher::vector_length(n->in(2)) >= 4);
19912 match(Set dst (MinReductionV src1 src2));
19913 match(Set dst (MaxReductionV src1 src2));
19914 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19915 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19916 ins_encode %{
19917 assert(UseAVX > 0, "sanity");
19918
19919 int opcode = this->ideal_Opcode();
19920 int vlen = Matcher::vector_length(this, $src2);
19921 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19922 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19923 %}
19924 ins_pipe( pipe_slow );
19925 %}
19926
19927
19928 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19929 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19930 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19931 Matcher::vector_length(n->in(2)) == 2);
19932 match(Set dst (MinReductionV dst src));
19933 match(Set dst (MaxReductionV dst src));
19934 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19935 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19936 ins_encode %{
19937 assert(UseAVX > 0, "sanity");
19938
19939 int opcode = this->ideal_Opcode();
19940 int vlen = Matcher::vector_length(this, $src);
19941 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19942 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19943 %}
19944 ins_pipe( pipe_slow );
19945 %}
19946
19947 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19948 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19949 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19950 Matcher::vector_length(n->in(2)) >= 4);
19951 match(Set dst (MinReductionV dst src));
19952 match(Set dst (MaxReductionV dst src));
19953 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19954 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19955 ins_encode %{
19956 assert(UseAVX > 0, "sanity");
19957
19958 int opcode = this->ideal_Opcode();
19959 int vlen = Matcher::vector_length(this, $src);
19960 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19961 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19962 %}
19963 ins_pipe( pipe_slow );
19964 %}
19965
19966 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19967 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19968 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19969 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19970 Matcher::vector_length(n->in(2)) == 2);
19971 match(Set dst (MinReductionV src1 src2));
19972 match(Set dst (MaxReductionV src1 src2));
19973 effect(TEMP dst, TEMP xtmp1);
19974 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19975 ins_encode %{
19976 int opcode = this->ideal_Opcode();
19977 int vlen = Matcher::vector_length(this, $src2);
19978 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19979 xnoreg, xnoreg, $xtmp1$$XMMRegister);
19980 %}
19981 ins_pipe( pipe_slow );
19982 %}
19983
19984 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19985 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19986 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19987 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19988 Matcher::vector_length(n->in(2)) >= 4);
19989 match(Set dst (MinReductionV src1 src2));
19990 match(Set dst (MaxReductionV src1 src2));
19991 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19992 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19993 ins_encode %{
19994 int opcode = this->ideal_Opcode();
19995 int vlen = Matcher::vector_length(this, $src2);
19996 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19997 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19998 %}
19999 ins_pipe( pipe_slow );
20000 %}
20001
20002
20003 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20004 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20005 Matcher::vector_length(n->in(2)) == 2);
20006 match(Set dst (MinReductionV dst src));
20007 match(Set dst (MaxReductionV dst src));
20008 effect(TEMP dst, TEMP xtmp1);
20009 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20010 ins_encode %{
20011 int opcode = this->ideal_Opcode();
20012 int vlen = Matcher::vector_length(this, $src);
20013 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20014 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20015 %}
20016 ins_pipe( pipe_slow );
20017 %}
20018
20019 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20020 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20021 Matcher::vector_length(n->in(2)) >= 4);
20022 match(Set dst (MinReductionV dst src));
20023 match(Set dst (MaxReductionV dst src));
20024 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20025 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20026 ins_encode %{
20027 int opcode = this->ideal_Opcode();
20028 int vlen = Matcher::vector_length(this, $src);
20029 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20030 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035 // ====================VECTOR ARITHMETIC=======================================
20036
20037 // --------------------------------- ADD --------------------------------------
20038
20039 // Bytes vector add
20040 instruct vaddB(vec dst, vec src) %{
20041 predicate(UseAVX == 0);
20042 match(Set dst (AddVB dst src));
20043 format %{ "paddb $dst,$src\t! add packedB" %}
20044 ins_encode %{
20045 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20046 %}
20047 ins_pipe( pipe_slow );
20048 %}
20049
20050 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20051 predicate(UseAVX > 0);
20052 match(Set dst (AddVB src1 src2));
20053 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20054 ins_encode %{
20055 int vlen_enc = vector_length_encoding(this);
20056 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20057 %}
20058 ins_pipe( pipe_slow );
20059 %}
20060
20061 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20062 predicate((UseAVX > 0) &&
20063 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20064 match(Set dst (AddVB src (LoadVector mem)));
20065 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20066 ins_encode %{
20067 int vlen_enc = vector_length_encoding(this);
20068 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20069 %}
20070 ins_pipe( pipe_slow );
20071 %}
20072
20073 // Shorts/Chars vector add
20074 instruct vaddS(vec dst, vec src) %{
20075 predicate(UseAVX == 0);
20076 match(Set dst (AddVS dst src));
20077 format %{ "paddw $dst,$src\t! add packedS" %}
20078 ins_encode %{
20079 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20080 %}
20081 ins_pipe( pipe_slow );
20082 %}
20083
20084 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20085 predicate(UseAVX > 0);
20086 match(Set dst (AddVS src1 src2));
20087 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20088 ins_encode %{
20089 int vlen_enc = vector_length_encoding(this);
20090 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20091 %}
20092 ins_pipe( pipe_slow );
20093 %}
20094
20095 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20096 predicate((UseAVX > 0) &&
20097 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20098 match(Set dst (AddVS src (LoadVector mem)));
20099 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20100 ins_encode %{
20101 int vlen_enc = vector_length_encoding(this);
20102 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20103 %}
20104 ins_pipe( pipe_slow );
20105 %}
20106
20107 // Integers vector add
20108 instruct vaddI(vec dst, vec src) %{
20109 predicate(UseAVX == 0);
20110 match(Set dst (AddVI dst src));
20111 format %{ "paddd $dst,$src\t! add packedI" %}
20112 ins_encode %{
20113 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20114 %}
20115 ins_pipe( pipe_slow );
20116 %}
20117
20118 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20119 predicate(UseAVX > 0);
20120 match(Set dst (AddVI src1 src2));
20121 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20122 ins_encode %{
20123 int vlen_enc = vector_length_encoding(this);
20124 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20125 %}
20126 ins_pipe( pipe_slow );
20127 %}
20128
20129
20130 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20131 predicate((UseAVX > 0) &&
20132 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20133 match(Set dst (AddVI src (LoadVector mem)));
20134 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20135 ins_encode %{
20136 int vlen_enc = vector_length_encoding(this);
20137 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20138 %}
20139 ins_pipe( pipe_slow );
20140 %}
20141
20142 // Longs vector add
20143 instruct vaddL(vec dst, vec src) %{
20144 predicate(UseAVX == 0);
20145 match(Set dst (AddVL dst src));
20146 format %{ "paddq $dst,$src\t! add packedL" %}
20147 ins_encode %{
20148 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20149 %}
20150 ins_pipe( pipe_slow );
20151 %}
20152
20153 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20154 predicate(UseAVX > 0);
20155 match(Set dst (AddVL src1 src2));
20156 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20157 ins_encode %{
20158 int vlen_enc = vector_length_encoding(this);
20159 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20160 %}
20161 ins_pipe( pipe_slow );
20162 %}
20163
20164 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20165 predicate((UseAVX > 0) &&
20166 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20167 match(Set dst (AddVL src (LoadVector mem)));
20168 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20169 ins_encode %{
20170 int vlen_enc = vector_length_encoding(this);
20171 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20172 %}
20173 ins_pipe( pipe_slow );
20174 %}
20175
20176 // Floats vector add
20177 instruct vaddF(vec dst, vec src) %{
20178 predicate(UseAVX == 0);
20179 match(Set dst (AddVF dst src));
20180 format %{ "addps $dst,$src\t! add packedF" %}
20181 ins_encode %{
20182 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20183 %}
20184 ins_pipe( pipe_slow );
20185 %}
20186
20187 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20188 predicate(UseAVX > 0);
20189 match(Set dst (AddVF src1 src2));
20190 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20191 ins_encode %{
20192 int vlen_enc = vector_length_encoding(this);
20193 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20194 %}
20195 ins_pipe( pipe_slow );
20196 %}
20197
20198 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20199 predicate((UseAVX > 0) &&
20200 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20201 match(Set dst (AddVF src (LoadVector mem)));
20202 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20203 ins_encode %{
20204 int vlen_enc = vector_length_encoding(this);
20205 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20206 %}
20207 ins_pipe( pipe_slow );
20208 %}
20209
20210 // Doubles vector add
20211 instruct vaddD(vec dst, vec src) %{
20212 predicate(UseAVX == 0);
20213 match(Set dst (AddVD dst src));
20214 format %{ "addpd $dst,$src\t! add packedD" %}
20215 ins_encode %{
20216 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20217 %}
20218 ins_pipe( pipe_slow );
20219 %}
20220
20221 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20222 predicate(UseAVX > 0);
20223 match(Set dst (AddVD src1 src2));
20224 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20225 ins_encode %{
20226 int vlen_enc = vector_length_encoding(this);
20227 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20228 %}
20229 ins_pipe( pipe_slow );
20230 %}
20231
20232 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20233 predicate((UseAVX > 0) &&
20234 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20235 match(Set dst (AddVD src (LoadVector mem)));
20236 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20237 ins_encode %{
20238 int vlen_enc = vector_length_encoding(this);
20239 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20240 %}
20241 ins_pipe( pipe_slow );
20242 %}
20243
20244 // --------------------------------- SUB --------------------------------------
20245
20246 // Bytes vector sub
20247 instruct vsubB(vec dst, vec src) %{
20248 predicate(UseAVX == 0);
20249 match(Set dst (SubVB dst src));
20250 format %{ "psubb $dst,$src\t! sub packedB" %}
20251 ins_encode %{
20252 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20253 %}
20254 ins_pipe( pipe_slow );
20255 %}
20256
20257 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20258 predicate(UseAVX > 0);
20259 match(Set dst (SubVB src1 src2));
20260 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20261 ins_encode %{
20262 int vlen_enc = vector_length_encoding(this);
20263 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20269 predicate((UseAVX > 0) &&
20270 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20271 match(Set dst (SubVB src (LoadVector mem)));
20272 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20273 ins_encode %{
20274 int vlen_enc = vector_length_encoding(this);
20275 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20276 %}
20277 ins_pipe( pipe_slow );
20278 %}
20279
20280 // Shorts/Chars vector sub
20281 instruct vsubS(vec dst, vec src) %{
20282 predicate(UseAVX == 0);
20283 match(Set dst (SubVS dst src));
20284 format %{ "psubw $dst,$src\t! sub packedS" %}
20285 ins_encode %{
20286 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20287 %}
20288 ins_pipe( pipe_slow );
20289 %}
20290
20291
20292 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20293 predicate(UseAVX > 0);
20294 match(Set dst (SubVS src1 src2));
20295 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20296 ins_encode %{
20297 int vlen_enc = vector_length_encoding(this);
20298 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20299 %}
20300 ins_pipe( pipe_slow );
20301 %}
20302
20303 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20304 predicate((UseAVX > 0) &&
20305 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20306 match(Set dst (SubVS src (LoadVector mem)));
20307 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20308 ins_encode %{
20309 int vlen_enc = vector_length_encoding(this);
20310 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20311 %}
20312 ins_pipe( pipe_slow );
20313 %}
20314
20315 // Integers vector sub
20316 instruct vsubI(vec dst, vec src) %{
20317 predicate(UseAVX == 0);
20318 match(Set dst (SubVI dst src));
20319 format %{ "psubd $dst,$src\t! sub packedI" %}
20320 ins_encode %{
20321 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20322 %}
20323 ins_pipe( pipe_slow );
20324 %}
20325
20326 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20327 predicate(UseAVX > 0);
20328 match(Set dst (SubVI src1 src2));
20329 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20330 ins_encode %{
20331 int vlen_enc = vector_length_encoding(this);
20332 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20333 %}
20334 ins_pipe( pipe_slow );
20335 %}
20336
20337 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20338 predicate((UseAVX > 0) &&
20339 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20340 match(Set dst (SubVI src (LoadVector mem)));
20341 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20342 ins_encode %{
20343 int vlen_enc = vector_length_encoding(this);
20344 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20345 %}
20346 ins_pipe( pipe_slow );
20347 %}
20348
20349 // Longs vector sub
20350 instruct vsubL(vec dst, vec src) %{
20351 predicate(UseAVX == 0);
20352 match(Set dst (SubVL dst src));
20353 format %{ "psubq $dst,$src\t! sub packedL" %}
20354 ins_encode %{
20355 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20356 %}
20357 ins_pipe( pipe_slow );
20358 %}
20359
20360 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20361 predicate(UseAVX > 0);
20362 match(Set dst (SubVL src1 src2));
20363 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20364 ins_encode %{
20365 int vlen_enc = vector_length_encoding(this);
20366 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20367 %}
20368 ins_pipe( pipe_slow );
20369 %}
20370
20371
20372 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20373 predicate((UseAVX > 0) &&
20374 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20375 match(Set dst (SubVL src (LoadVector mem)));
20376 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20377 ins_encode %{
20378 int vlen_enc = vector_length_encoding(this);
20379 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20380 %}
20381 ins_pipe( pipe_slow );
20382 %}
20383
20384 // Floats vector sub
20385 instruct vsubF(vec dst, vec src) %{
20386 predicate(UseAVX == 0);
20387 match(Set dst (SubVF dst src));
20388 format %{ "subps $dst,$src\t! sub packedF" %}
20389 ins_encode %{
20390 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20391 %}
20392 ins_pipe( pipe_slow );
20393 %}
20394
20395 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20396 predicate(UseAVX > 0);
20397 match(Set dst (SubVF src1 src2));
20398 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20399 ins_encode %{
20400 int vlen_enc = vector_length_encoding(this);
20401 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20402 %}
20403 ins_pipe( pipe_slow );
20404 %}
20405
20406 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20407 predicate((UseAVX > 0) &&
20408 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20409 match(Set dst (SubVF src (LoadVector mem)));
20410 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20411 ins_encode %{
20412 int vlen_enc = vector_length_encoding(this);
20413 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20414 %}
20415 ins_pipe( pipe_slow );
20416 %}
20417
20418 // Doubles vector sub
20419 instruct vsubD(vec dst, vec src) %{
20420 predicate(UseAVX == 0);
20421 match(Set dst (SubVD dst src));
20422 format %{ "subpd $dst,$src\t! sub packedD" %}
20423 ins_encode %{
20424 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20425 %}
20426 ins_pipe( pipe_slow );
20427 %}
20428
20429 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20430 predicate(UseAVX > 0);
20431 match(Set dst (SubVD src1 src2));
20432 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20433 ins_encode %{
20434 int vlen_enc = vector_length_encoding(this);
20435 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20436 %}
20437 ins_pipe( pipe_slow );
20438 %}
20439
20440 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20441 predicate((UseAVX > 0) &&
20442 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20443 match(Set dst (SubVD src (LoadVector mem)));
20444 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20445 ins_encode %{
20446 int vlen_enc = vector_length_encoding(this);
20447 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20448 %}
20449 ins_pipe( pipe_slow );
20450 %}
20451
20452 // --------------------------------- MUL --------------------------------------
20453
20454 // Byte vector mul
20455 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20456 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20457 match(Set dst (MulVB src1 src2));
20458 effect(TEMP dst, TEMP xtmp);
20459 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20460 ins_encode %{
20461 assert(UseSSE > 3, "required");
20462 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20463 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20464 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20465 __ psllw($dst$$XMMRegister, 8);
20466 __ psrlw($dst$$XMMRegister, 8);
20467 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20468 %}
20469 ins_pipe( pipe_slow );
20470 %}
20471
20472 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20473 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20474 match(Set dst (MulVB src1 src2));
20475 effect(TEMP dst, TEMP xtmp);
20476 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20477 ins_encode %{
20478 assert(UseSSE > 3, "required");
20479 // Odd-index elements
20480 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20481 __ psrlw($dst$$XMMRegister, 8);
20482 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20483 __ psrlw($xtmp$$XMMRegister, 8);
20484 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20485 __ psllw($dst$$XMMRegister, 8);
20486 // Even-index elements
20487 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20488 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20489 __ psllw($xtmp$$XMMRegister, 8);
20490 __ psrlw($xtmp$$XMMRegister, 8);
20491 // Combine
20492 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20493 %}
20494 ins_pipe( pipe_slow );
20495 %}
20496
20497 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20498 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20499 match(Set dst (MulVB src1 src2));
20500 effect(TEMP xtmp1, TEMP xtmp2);
20501 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20502 ins_encode %{
20503 int vlen_enc = vector_length_encoding(this);
20504 // Odd-index elements
20505 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20506 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20507 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20508 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20509 // Even-index elements
20510 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20511 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20512 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20513 // Combine
20514 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20515 %}
20516 ins_pipe( pipe_slow );
20517 %}
20518
20519 // Shorts/Chars vector mul
20520 instruct vmulS(vec dst, vec src) %{
20521 predicate(UseAVX == 0);
20522 match(Set dst (MulVS dst src));
20523 format %{ "pmullw $dst,$src\t! mul packedS" %}
20524 ins_encode %{
20525 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20526 %}
20527 ins_pipe( pipe_slow );
20528 %}
20529
20530 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20531 predicate(UseAVX > 0);
20532 match(Set dst (MulVS src1 src2));
20533 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20534 ins_encode %{
20535 int vlen_enc = vector_length_encoding(this);
20536 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20537 %}
20538 ins_pipe( pipe_slow );
20539 %}
20540
20541 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20542 predicate((UseAVX > 0) &&
20543 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20544 match(Set dst (MulVS src (LoadVector mem)));
20545 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20546 ins_encode %{
20547 int vlen_enc = vector_length_encoding(this);
20548 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20549 %}
20550 ins_pipe( pipe_slow );
20551 %}
20552
20553 // Integers vector mul
20554 instruct vmulI(vec dst, vec src) %{
20555 predicate(UseAVX == 0);
20556 match(Set dst (MulVI dst src));
20557 format %{ "pmulld $dst,$src\t! mul packedI" %}
20558 ins_encode %{
20559 assert(UseSSE > 3, "required");
20560 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20561 %}
20562 ins_pipe( pipe_slow );
20563 %}
20564
20565 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20566 predicate(UseAVX > 0);
20567 match(Set dst (MulVI src1 src2));
20568 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20569 ins_encode %{
20570 int vlen_enc = vector_length_encoding(this);
20571 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20572 %}
20573 ins_pipe( pipe_slow );
20574 %}
20575
20576 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20577 predicate((UseAVX > 0) &&
20578 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20579 match(Set dst (MulVI src (LoadVector mem)));
20580 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20581 ins_encode %{
20582 int vlen_enc = vector_length_encoding(this);
20583 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20584 %}
20585 ins_pipe( pipe_slow );
20586 %}
20587
20588 // Longs vector mul
20589 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20590 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20591 VM_Version::supports_avx512dq()) ||
20592 VM_Version::supports_avx512vldq());
20593 match(Set dst (MulVL src1 src2));
20594 ins_cost(500);
20595 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20596 ins_encode %{
20597 assert(UseAVX > 2, "required");
20598 int vlen_enc = vector_length_encoding(this);
20599 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20600 %}
20601 ins_pipe( pipe_slow );
20602 %}
20603
20604 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20605 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20606 VM_Version::supports_avx512dq()) ||
20607 (Matcher::vector_length_in_bytes(n) > 8 &&
20608 VM_Version::supports_avx512vldq()));
20609 match(Set dst (MulVL src (LoadVector mem)));
20610 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20611 ins_cost(500);
20612 ins_encode %{
20613 assert(UseAVX > 2, "required");
20614 int vlen_enc = vector_length_encoding(this);
20615 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20616 %}
20617 ins_pipe( pipe_slow );
20618 %}
20619
20620 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20621 predicate(UseAVX == 0);
20622 match(Set dst (MulVL src1 src2));
20623 ins_cost(500);
20624 effect(TEMP dst, TEMP xtmp);
20625 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20626 ins_encode %{
20627 assert(VM_Version::supports_sse4_1(), "required");
20628 // Get the lo-hi products, only the lower 32 bits is in concerns
20629 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20630 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20631 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20632 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20633 __ psllq($dst$$XMMRegister, 32);
20634 // Get the lo-lo products
20635 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20636 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20637 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20638 %}
20639 ins_pipe( pipe_slow );
20640 %}
20641
20642 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20643 predicate(UseAVX > 0 &&
20644 ((Matcher::vector_length_in_bytes(n) == 64 &&
20645 !VM_Version::supports_avx512dq()) ||
20646 (Matcher::vector_length_in_bytes(n) < 64 &&
20647 !VM_Version::supports_avx512vldq())));
20648 match(Set dst (MulVL src1 src2));
20649 effect(TEMP xtmp1, TEMP xtmp2);
20650 ins_cost(500);
20651 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20652 ins_encode %{
20653 int vlen_enc = vector_length_encoding(this);
20654 // Get the lo-hi products, only the lower 32 bits is in concerns
20655 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20656 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20657 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20658 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20659 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20660 // Get the lo-lo products
20661 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20662 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20663 %}
20664 ins_pipe( pipe_slow );
20665 %}
20666
20667 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20668 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20669 match(Set dst (MulVL src1 src2));
20670 ins_cost(100);
20671 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20672 ins_encode %{
20673 int vlen_enc = vector_length_encoding(this);
20674 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20675 %}
20676 ins_pipe( pipe_slow );
20677 %}
20678
20679 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20680 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20681 match(Set dst (MulVL src1 src2));
20682 ins_cost(100);
20683 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20684 ins_encode %{
20685 int vlen_enc = vector_length_encoding(this);
20686 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20687 %}
20688 ins_pipe( pipe_slow );
20689 %}
20690
20691 // Floats vector mul
20692 instruct vmulF(vec dst, vec src) %{
20693 predicate(UseAVX == 0);
20694 match(Set dst (MulVF dst src));
20695 format %{ "mulps $dst,$src\t! mul packedF" %}
20696 ins_encode %{
20697 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20698 %}
20699 ins_pipe( pipe_slow );
20700 %}
20701
20702 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20703 predicate(UseAVX > 0);
20704 match(Set dst (MulVF src1 src2));
20705 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20706 ins_encode %{
20707 int vlen_enc = vector_length_encoding(this);
20708 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20709 %}
20710 ins_pipe( pipe_slow );
20711 %}
20712
20713 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20714 predicate((UseAVX > 0) &&
20715 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20716 match(Set dst (MulVF src (LoadVector mem)));
20717 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20718 ins_encode %{
20719 int vlen_enc = vector_length_encoding(this);
20720 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20721 %}
20722 ins_pipe( pipe_slow );
20723 %}
20724
20725 // Doubles vector mul
20726 instruct vmulD(vec dst, vec src) %{
20727 predicate(UseAVX == 0);
20728 match(Set dst (MulVD dst src));
20729 format %{ "mulpd $dst,$src\t! mul packedD" %}
20730 ins_encode %{
20731 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20732 %}
20733 ins_pipe( pipe_slow );
20734 %}
20735
20736 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20737 predicate(UseAVX > 0);
20738 match(Set dst (MulVD src1 src2));
20739 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20740 ins_encode %{
20741 int vlen_enc = vector_length_encoding(this);
20742 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743 %}
20744 ins_pipe( pipe_slow );
20745 %}
20746
20747 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20748 predicate((UseAVX > 0) &&
20749 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750 match(Set dst (MulVD src (LoadVector mem)));
20751 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20752 ins_encode %{
20753 int vlen_enc = vector_length_encoding(this);
20754 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755 %}
20756 ins_pipe( pipe_slow );
20757 %}
20758
20759 // --------------------------------- DIV --------------------------------------
20760
20761 // Floats vector div
20762 instruct vdivF(vec dst, vec src) %{
20763 predicate(UseAVX == 0);
20764 match(Set dst (DivVF dst src));
20765 format %{ "divps $dst,$src\t! div packedF" %}
20766 ins_encode %{
20767 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20768 %}
20769 ins_pipe( pipe_slow );
20770 %}
20771
20772 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20773 predicate(UseAVX > 0);
20774 match(Set dst (DivVF src1 src2));
20775 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20776 ins_encode %{
20777 int vlen_enc = vector_length_encoding(this);
20778 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20779 %}
20780 ins_pipe( pipe_slow );
20781 %}
20782
20783 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20784 predicate((UseAVX > 0) &&
20785 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20786 match(Set dst (DivVF src (LoadVector mem)));
20787 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20788 ins_encode %{
20789 int vlen_enc = vector_length_encoding(this);
20790 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20791 %}
20792 ins_pipe( pipe_slow );
20793 %}
20794
20795 // Doubles vector div
20796 instruct vdivD(vec dst, vec src) %{
20797 predicate(UseAVX == 0);
20798 match(Set dst (DivVD dst src));
20799 format %{ "divpd $dst,$src\t! div packedD" %}
20800 ins_encode %{
20801 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20802 %}
20803 ins_pipe( pipe_slow );
20804 %}
20805
20806 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20807 predicate(UseAVX > 0);
20808 match(Set dst (DivVD src1 src2));
20809 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20810 ins_encode %{
20811 int vlen_enc = vector_length_encoding(this);
20812 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20813 %}
20814 ins_pipe( pipe_slow );
20815 %}
20816
20817 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20818 predicate((UseAVX > 0) &&
20819 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20820 match(Set dst (DivVD src (LoadVector mem)));
20821 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20822 ins_encode %{
20823 int vlen_enc = vector_length_encoding(this);
20824 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20825 %}
20826 ins_pipe( pipe_slow );
20827 %}
20828
20829 // ------------------------------ MinMax ---------------------------------------
20830
20831 // Byte, Short, Int vector Min/Max
20832 instruct minmax_reg_sse(vec dst, vec src) %{
20833 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20834 UseAVX == 0);
20835 match(Set dst (MinV dst src));
20836 match(Set dst (MaxV dst src));
20837 format %{ "vector_minmax $dst,$src\t! " %}
20838 ins_encode %{
20839 assert(UseSSE >= 4, "required");
20840
20841 int opcode = this->ideal_Opcode();
20842 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20843 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20844 %}
20845 ins_pipe( pipe_slow );
20846 %}
20847
20848 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20849 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20850 UseAVX > 0);
20851 match(Set dst (MinV src1 src2));
20852 match(Set dst (MaxV src1 src2));
20853 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20854 ins_encode %{
20855 int opcode = this->ideal_Opcode();
20856 int vlen_enc = vector_length_encoding(this);
20857 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20858
20859 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20860 %}
20861 ins_pipe( pipe_slow );
20862 %}
20863
20864 // Long vector Min/Max
20865 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20866 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20867 UseAVX == 0);
20868 match(Set dst (MinV dst src));
20869 match(Set dst (MaxV src dst));
20870 effect(TEMP dst, TEMP tmp);
20871 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20872 ins_encode %{
20873 assert(UseSSE >= 4, "required");
20874
20875 int opcode = this->ideal_Opcode();
20876 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20877 assert(elem_bt == T_LONG, "sanity");
20878
20879 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20880 %}
20881 ins_pipe( pipe_slow );
20882 %}
20883
20884 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20885 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20886 UseAVX > 0 && !VM_Version::supports_avx512vl());
20887 match(Set dst (MinV src1 src2));
20888 match(Set dst (MaxV src1 src2));
20889 effect(TEMP dst);
20890 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20891 ins_encode %{
20892 int vlen_enc = vector_length_encoding(this);
20893 int opcode = this->ideal_Opcode();
20894 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20895 assert(elem_bt == T_LONG, "sanity");
20896
20897 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20898 %}
20899 ins_pipe( pipe_slow );
20900 %}
20901
20902 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20903 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20904 Matcher::vector_element_basic_type(n) == T_LONG);
20905 match(Set dst (MinV src1 src2));
20906 match(Set dst (MaxV src1 src2));
20907 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20908 ins_encode %{
20909 assert(UseAVX > 2, "required");
20910
20911 int vlen_enc = vector_length_encoding(this);
20912 int opcode = this->ideal_Opcode();
20913 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20914 assert(elem_bt == T_LONG, "sanity");
20915
20916 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20917 %}
20918 ins_pipe( pipe_slow );
20919 %}
20920
20921 // Float/Double vector Min/Max
20922 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20923 predicate(VM_Version::supports_avx10_2() &&
20924 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20925 match(Set dst (MinV a b));
20926 match(Set dst (MaxV a b));
20927 format %{ "vector_minmaxFP $dst, $a, $b" %}
20928 ins_encode %{
20929 int vlen_enc = vector_length_encoding(this);
20930 int opcode = this->ideal_Opcode();
20931 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20932 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20933 %}
20934 ins_pipe( pipe_slow );
20935 %}
20936
20937 // Float/Double vector Min/Max
20938 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20939 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20940 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20941 UseAVX > 0);
20942 match(Set dst (MinV a b));
20943 match(Set dst (MaxV a b));
20944 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20945 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20946 ins_encode %{
20947 assert(UseAVX > 0, "required");
20948
20949 int opcode = this->ideal_Opcode();
20950 int vlen_enc = vector_length_encoding(this);
20951 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20952
20953 __ vminmax_fp(opcode, elem_bt,
20954 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20955 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20956 %}
20957 ins_pipe( pipe_slow );
20958 %}
20959
20960 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20961 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20962 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20963 match(Set dst (MinV a b));
20964 match(Set dst (MaxV a b));
20965 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20966 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20967 ins_encode %{
20968 assert(UseAVX > 2, "required");
20969
20970 int opcode = this->ideal_Opcode();
20971 int vlen_enc = vector_length_encoding(this);
20972 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20973
20974 __ evminmax_fp(opcode, elem_bt,
20975 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20976 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20977 %}
20978 ins_pipe( pipe_slow );
20979 %}
20980
20981 // ------------------------------ Unsigned vector Min/Max ----------------------
20982
20983 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20984 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20985 match(Set dst (UMinV a b));
20986 match(Set dst (UMaxV a b));
20987 format %{ "vector_uminmax $dst,$a,$b\t!" %}
20988 ins_encode %{
20989 int opcode = this->ideal_Opcode();
20990 int vlen_enc = vector_length_encoding(this);
20991 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20992 assert(is_integral_type(elem_bt), "");
20993 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20994 %}
20995 ins_pipe( pipe_slow );
20996 %}
20997
20998 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20999 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21000 match(Set dst (UMinV a (LoadVector b)));
21001 match(Set dst (UMaxV a (LoadVector b)));
21002 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21003 ins_encode %{
21004 int opcode = this->ideal_Opcode();
21005 int vlen_enc = vector_length_encoding(this);
21006 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007 assert(is_integral_type(elem_bt), "");
21008 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21009 %}
21010 ins_pipe( pipe_slow );
21011 %}
21012
21013 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21014 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21015 match(Set dst (UMinV a b));
21016 match(Set dst (UMaxV a b));
21017 effect(TEMP xtmp1, TEMP xtmp2);
21018 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21019 ins_encode %{
21020 int opcode = this->ideal_Opcode();
21021 int vlen_enc = vector_length_encoding(this);
21022 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21023 %}
21024 ins_pipe( pipe_slow );
21025 %}
21026
21027 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21028 match(Set dst (UMinV (Binary dst src2) mask));
21029 match(Set dst (UMaxV (Binary dst src2) mask));
21030 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21031 ins_encode %{
21032 int vlen_enc = vector_length_encoding(this);
21033 BasicType bt = Matcher::vector_element_basic_type(this);
21034 int opc = this->ideal_Opcode();
21035 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21036 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21037 %}
21038 ins_pipe( pipe_slow );
21039 %}
21040
21041 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21042 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21043 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21044 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21045 ins_encode %{
21046 int vlen_enc = vector_length_encoding(this);
21047 BasicType bt = Matcher::vector_element_basic_type(this);
21048 int opc = this->ideal_Opcode();
21049 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21050 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21051 %}
21052 ins_pipe( pipe_slow );
21053 %}
21054
21055 // --------------------------------- Signum/CopySign ---------------------------
21056
21057 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21058 match(Set dst (SignumF dst (Binary zero one)));
21059 effect(KILL cr);
21060 format %{ "signumF $dst, $dst" %}
21061 ins_encode %{
21062 int opcode = this->ideal_Opcode();
21063 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21064 %}
21065 ins_pipe( pipe_slow );
21066 %}
21067
21068 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21069 match(Set dst (SignumD dst (Binary zero one)));
21070 effect(KILL cr);
21071 format %{ "signumD $dst, $dst" %}
21072 ins_encode %{
21073 int opcode = this->ideal_Opcode();
21074 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21075 %}
21076 ins_pipe( pipe_slow );
21077 %}
21078
21079 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21080 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21081 match(Set dst (SignumVF src (Binary zero one)));
21082 match(Set dst (SignumVD src (Binary zero one)));
21083 effect(TEMP dst, TEMP xtmp1);
21084 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21085 ins_encode %{
21086 int opcode = this->ideal_Opcode();
21087 int vec_enc = vector_length_encoding(this);
21088 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21089 $xtmp1$$XMMRegister, vec_enc);
21090 %}
21091 ins_pipe( pipe_slow );
21092 %}
21093
21094 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21095 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21096 match(Set dst (SignumVF src (Binary zero one)));
21097 match(Set dst (SignumVD src (Binary zero one)));
21098 effect(TEMP dst, TEMP ktmp1);
21099 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21100 ins_encode %{
21101 int opcode = this->ideal_Opcode();
21102 int vec_enc = vector_length_encoding(this);
21103 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21104 $ktmp1$$KRegister, vec_enc);
21105 %}
21106 ins_pipe( pipe_slow );
21107 %}
21108
21109 // ---------------------------------------
21110 // For copySign use 0xE4 as writemask for vpternlog
21111 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21112 // C (xmm2) is set to 0x7FFFFFFF
21113 // Wherever xmm2 is 0, we want to pick from B (sign)
21114 // Wherever xmm2 is 1, we want to pick from A (src)
21115 //
21116 // A B C Result
21117 // 0 0 0 0
21118 // 0 0 1 0
21119 // 0 1 0 1
21120 // 0 1 1 0
21121 // 1 0 0 0
21122 // 1 0 1 1
21123 // 1 1 0 1
21124 // 1 1 1 1
21125 //
21126 // Result going from high bit to low bit is 0x11100100 = 0xe4
21127 // ---------------------------------------
21128
21129 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21130 match(Set dst (CopySignF dst src));
21131 effect(TEMP tmp1, TEMP tmp2);
21132 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21133 ins_encode %{
21134 __ movl($tmp2$$Register, 0x7FFFFFFF);
21135 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21136 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21137 %}
21138 ins_pipe( pipe_slow );
21139 %}
21140
21141 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21142 match(Set dst (CopySignD dst (Binary src zero)));
21143 ins_cost(100);
21144 effect(TEMP tmp1, TEMP tmp2);
21145 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21146 ins_encode %{
21147 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21148 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21149 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21150 %}
21151 ins_pipe( pipe_slow );
21152 %}
21153
21154 //----------------------------- CompressBits/ExpandBits ------------------------
21155
21156 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21157 predicate(n->bottom_type()->isa_int());
21158 match(Set dst (CompressBits src mask));
21159 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21160 ins_encode %{
21161 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21162 %}
21163 ins_pipe( pipe_slow );
21164 %}
21165
21166 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21167 predicate(n->bottom_type()->isa_int());
21168 match(Set dst (ExpandBits src mask));
21169 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21170 ins_encode %{
21171 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21172 %}
21173 ins_pipe( pipe_slow );
21174 %}
21175
21176 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21177 predicate(n->bottom_type()->isa_int());
21178 match(Set dst (CompressBits src (LoadI mask)));
21179 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21180 ins_encode %{
21181 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21182 %}
21183 ins_pipe( pipe_slow );
21184 %}
21185
21186 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21187 predicate(n->bottom_type()->isa_int());
21188 match(Set dst (ExpandBits src (LoadI mask)));
21189 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21190 ins_encode %{
21191 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21192 %}
21193 ins_pipe( pipe_slow );
21194 %}
21195
21196 // --------------------------------- Sqrt --------------------------------------
21197
21198 instruct vsqrtF_reg(vec dst, vec src) %{
21199 match(Set dst (SqrtVF src));
21200 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21201 ins_encode %{
21202 assert(UseAVX > 0, "required");
21203 int vlen_enc = vector_length_encoding(this);
21204 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21205 %}
21206 ins_pipe( pipe_slow );
21207 %}
21208
21209 instruct vsqrtF_mem(vec dst, memory mem) %{
21210 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21211 match(Set dst (SqrtVF (LoadVector mem)));
21212 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21213 ins_encode %{
21214 assert(UseAVX > 0, "required");
21215 int vlen_enc = vector_length_encoding(this);
21216 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21217 %}
21218 ins_pipe( pipe_slow );
21219 %}
21220
21221 // Floating point vector sqrt
21222 instruct vsqrtD_reg(vec dst, vec src) %{
21223 match(Set dst (SqrtVD src));
21224 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21225 ins_encode %{
21226 assert(UseAVX > 0, "required");
21227 int vlen_enc = vector_length_encoding(this);
21228 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21229 %}
21230 ins_pipe( pipe_slow );
21231 %}
21232
21233 instruct vsqrtD_mem(vec dst, memory mem) %{
21234 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21235 match(Set dst (SqrtVD (LoadVector mem)));
21236 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21237 ins_encode %{
21238 assert(UseAVX > 0, "required");
21239 int vlen_enc = vector_length_encoding(this);
21240 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21241 %}
21242 ins_pipe( pipe_slow );
21243 %}
21244
21245 // ------------------------------ Shift ---------------------------------------
21246
21247 // Left and right shift count vectors are the same on x86
21248 // (only lowest bits of xmm reg are used for count).
21249 instruct vshiftcnt(vec dst, rRegI cnt) %{
21250 match(Set dst (LShiftCntV cnt));
21251 match(Set dst (RShiftCntV cnt));
21252 format %{ "movdl $dst,$cnt\t! load shift count" %}
21253 ins_encode %{
21254 __ movdl($dst$$XMMRegister, $cnt$$Register);
21255 %}
21256 ins_pipe( pipe_slow );
21257 %}
21258
21259 // Byte vector shift
21260 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21261 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21262 match(Set dst ( LShiftVB src shift));
21263 match(Set dst ( RShiftVB src shift));
21264 match(Set dst (URShiftVB src shift));
21265 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21266 format %{"vector_byte_shift $dst,$src,$shift" %}
21267 ins_encode %{
21268 assert(UseSSE > 3, "required");
21269 int opcode = this->ideal_Opcode();
21270 bool sign = (opcode != Op_URShiftVB);
21271 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21272 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21273 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21274 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21275 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21276 %}
21277 ins_pipe( pipe_slow );
21278 %}
21279
21280 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21281 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21282 UseAVX <= 1);
21283 match(Set dst ( LShiftVB src shift));
21284 match(Set dst ( RShiftVB src shift));
21285 match(Set dst (URShiftVB src shift));
21286 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21287 format %{"vector_byte_shift $dst,$src,$shift" %}
21288 ins_encode %{
21289 assert(UseSSE > 3, "required");
21290 int opcode = this->ideal_Opcode();
21291 bool sign = (opcode != Op_URShiftVB);
21292 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21293 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21294 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21295 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21296 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21297 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21298 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21299 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21300 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21301 %}
21302 ins_pipe( pipe_slow );
21303 %}
21304
21305 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21306 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21307 UseAVX > 1);
21308 match(Set dst ( LShiftVB src shift));
21309 match(Set dst ( RShiftVB src shift));
21310 match(Set dst (URShiftVB src shift));
21311 effect(TEMP dst, TEMP tmp);
21312 format %{"vector_byte_shift $dst,$src,$shift" %}
21313 ins_encode %{
21314 int opcode = this->ideal_Opcode();
21315 bool sign = (opcode != Op_URShiftVB);
21316 int vlen_enc = Assembler::AVX_256bit;
21317 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21318 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21319 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21320 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21321 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21322 %}
21323 ins_pipe( pipe_slow );
21324 %}
21325
21326 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21327 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21328 match(Set dst ( LShiftVB src shift));
21329 match(Set dst ( RShiftVB src shift));
21330 match(Set dst (URShiftVB src shift));
21331 effect(TEMP dst, TEMP tmp);
21332 format %{"vector_byte_shift $dst,$src,$shift" %}
21333 ins_encode %{
21334 assert(UseAVX > 1, "required");
21335 int opcode = this->ideal_Opcode();
21336 bool sign = (opcode != Op_URShiftVB);
21337 int vlen_enc = Assembler::AVX_256bit;
21338 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21339 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21340 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21341 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21342 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21343 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21344 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21345 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21346 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21347 %}
21348 ins_pipe( pipe_slow );
21349 %}
21350
21351 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21352 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21353 match(Set dst ( LShiftVB src shift));
21354 match(Set dst (RShiftVB src shift));
21355 match(Set dst (URShiftVB src shift));
21356 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21357 format %{"vector_byte_shift $dst,$src,$shift" %}
21358 ins_encode %{
21359 assert(UseAVX > 2, "required");
21360 int opcode = this->ideal_Opcode();
21361 bool sign = (opcode != Op_URShiftVB);
21362 int vlen_enc = Assembler::AVX_512bit;
21363 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21364 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21365 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21366 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21367 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21368 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21369 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21370 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21371 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21372 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21373 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21374 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21375 %}
21376 ins_pipe( pipe_slow );
21377 %}
21378
21379 // Shorts vector logical right shift produces incorrect Java result
21380 // for negative data because java code convert short value into int with
21381 // sign extension before a shift. But char vectors are fine since chars are
21382 // unsigned values.
21383 // Shorts/Chars vector left shift
21384 instruct vshiftS(vec dst, vec src, vec shift) %{
21385 predicate(!n->as_ShiftV()->is_var_shift());
21386 match(Set dst ( LShiftVS src shift));
21387 match(Set dst ( RShiftVS src shift));
21388 match(Set dst (URShiftVS src shift));
21389 effect(TEMP dst, USE src, USE shift);
21390 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21391 ins_encode %{
21392 int opcode = this->ideal_Opcode();
21393 if (UseAVX > 0) {
21394 int vlen_enc = vector_length_encoding(this);
21395 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21396 } else {
21397 int vlen = Matcher::vector_length(this);
21398 if (vlen == 2) {
21399 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21400 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21401 } else if (vlen == 4) {
21402 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21403 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21404 } else {
21405 assert (vlen == 8, "sanity");
21406 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21407 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21408 }
21409 }
21410 %}
21411 ins_pipe( pipe_slow );
21412 %}
21413
21414 // Integers vector left shift
21415 instruct vshiftI(vec dst, vec src, vec shift) %{
21416 predicate(!n->as_ShiftV()->is_var_shift());
21417 match(Set dst ( LShiftVI src shift));
21418 match(Set dst ( RShiftVI src shift));
21419 match(Set dst (URShiftVI src shift));
21420 effect(TEMP dst, USE src, USE shift);
21421 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21422 ins_encode %{
21423 int opcode = this->ideal_Opcode();
21424 if (UseAVX > 0) {
21425 int vlen_enc = vector_length_encoding(this);
21426 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21427 } else {
21428 int vlen = Matcher::vector_length(this);
21429 if (vlen == 2) {
21430 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21431 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432 } else {
21433 assert(vlen == 4, "sanity");
21434 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21435 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21436 }
21437 }
21438 %}
21439 ins_pipe( pipe_slow );
21440 %}
21441
21442 // Integers vector left constant shift
21443 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21444 match(Set dst (LShiftVI src (LShiftCntV shift)));
21445 match(Set dst (RShiftVI src (RShiftCntV shift)));
21446 match(Set dst (URShiftVI src (RShiftCntV shift)));
21447 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21448 ins_encode %{
21449 int opcode = this->ideal_Opcode();
21450 if (UseAVX > 0) {
21451 int vector_len = vector_length_encoding(this);
21452 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21453 } else {
21454 int vlen = Matcher::vector_length(this);
21455 if (vlen == 2) {
21456 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21457 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21458 } else {
21459 assert(vlen == 4, "sanity");
21460 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21461 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21462 }
21463 }
21464 %}
21465 ins_pipe( pipe_slow );
21466 %}
21467
21468 // Longs vector shift
21469 instruct vshiftL(vec dst, vec src, vec shift) %{
21470 predicate(!n->as_ShiftV()->is_var_shift());
21471 match(Set dst ( LShiftVL src shift));
21472 match(Set dst (URShiftVL src shift));
21473 effect(TEMP dst, USE src, USE shift);
21474 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21475 ins_encode %{
21476 int opcode = this->ideal_Opcode();
21477 if (UseAVX > 0) {
21478 int vlen_enc = vector_length_encoding(this);
21479 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21480 } else {
21481 assert(Matcher::vector_length(this) == 2, "");
21482 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21483 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21484 }
21485 %}
21486 ins_pipe( pipe_slow );
21487 %}
21488
21489 // Longs vector constant shift
21490 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21491 match(Set dst (LShiftVL src (LShiftCntV shift)));
21492 match(Set dst (URShiftVL src (RShiftCntV shift)));
21493 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21494 ins_encode %{
21495 int opcode = this->ideal_Opcode();
21496 if (UseAVX > 0) {
21497 int vector_len = vector_length_encoding(this);
21498 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21499 } else {
21500 assert(Matcher::vector_length(this) == 2, "");
21501 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21502 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21503 }
21504 %}
21505 ins_pipe( pipe_slow );
21506 %}
21507
21508 // -------------------ArithmeticRightShift -----------------------------------
21509 // Long vector arithmetic right shift
21510 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21511 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21512 match(Set dst (RShiftVL src shift));
21513 effect(TEMP dst, TEMP tmp);
21514 format %{ "vshiftq $dst,$src,$shift" %}
21515 ins_encode %{
21516 uint vlen = Matcher::vector_length(this);
21517 if (vlen == 2) {
21518 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21519 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21520 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21521 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21522 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21523 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21524 } else {
21525 assert(vlen == 4, "sanity");
21526 assert(UseAVX > 1, "required");
21527 int vlen_enc = Assembler::AVX_256bit;
21528 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21529 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21530 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21531 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21532 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21533 }
21534 %}
21535 ins_pipe( pipe_slow );
21536 %}
21537
21538 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21539 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21540 match(Set dst (RShiftVL src shift));
21541 format %{ "vshiftq $dst,$src,$shift" %}
21542 ins_encode %{
21543 int vlen_enc = vector_length_encoding(this);
21544 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21545 %}
21546 ins_pipe( pipe_slow );
21547 %}
21548
21549 // ------------------- Variable Shift -----------------------------
21550 // Byte variable shift
21551 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21552 predicate(Matcher::vector_length(n) <= 8 &&
21553 n->as_ShiftV()->is_var_shift() &&
21554 !VM_Version::supports_avx512bw());
21555 match(Set dst ( LShiftVB src shift));
21556 match(Set dst ( RShiftVB src shift));
21557 match(Set dst (URShiftVB src shift));
21558 effect(TEMP dst, TEMP vtmp);
21559 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21560 ins_encode %{
21561 assert(UseAVX >= 2, "required");
21562
21563 int opcode = this->ideal_Opcode();
21564 int vlen_enc = Assembler::AVX_128bit;
21565 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21566 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21567 %}
21568 ins_pipe( pipe_slow );
21569 %}
21570
21571 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21572 predicate(Matcher::vector_length(n) == 16 &&
21573 n->as_ShiftV()->is_var_shift() &&
21574 !VM_Version::supports_avx512bw());
21575 match(Set dst ( LShiftVB src shift));
21576 match(Set dst ( RShiftVB src shift));
21577 match(Set dst (URShiftVB src shift));
21578 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21579 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21580 ins_encode %{
21581 assert(UseAVX >= 2, "required");
21582
21583 int opcode = this->ideal_Opcode();
21584 int vlen_enc = Assembler::AVX_128bit;
21585 // Shift lower half and get word result in dst
21586 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21587
21588 // Shift upper half and get word result in vtmp1
21589 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21590 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21591 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21592
21593 // Merge and down convert the two word results to byte in dst
21594 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21595 %}
21596 ins_pipe( pipe_slow );
21597 %}
21598
21599 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21600 predicate(Matcher::vector_length(n) == 32 &&
21601 n->as_ShiftV()->is_var_shift() &&
21602 !VM_Version::supports_avx512bw());
21603 match(Set dst ( LShiftVB src shift));
21604 match(Set dst ( RShiftVB src shift));
21605 match(Set dst (URShiftVB src shift));
21606 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21607 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21608 ins_encode %{
21609 assert(UseAVX >= 2, "required");
21610
21611 int opcode = this->ideal_Opcode();
21612 int vlen_enc = Assembler::AVX_128bit;
21613 // Process lower 128 bits and get result in dst
21614 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21615 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21616 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21617 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21618 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21619
21620 // Process higher 128 bits and get result in vtmp3
21621 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21622 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21623 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21624 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21625 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21626 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21627 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21628
21629 // Merge the two results in dst
21630 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21631 %}
21632 ins_pipe( pipe_slow );
21633 %}
21634
21635 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21636 predicate(Matcher::vector_length(n) <= 32 &&
21637 n->as_ShiftV()->is_var_shift() &&
21638 VM_Version::supports_avx512bw());
21639 match(Set dst ( LShiftVB src shift));
21640 match(Set dst ( RShiftVB src shift));
21641 match(Set dst (URShiftVB src shift));
21642 effect(TEMP dst, TEMP vtmp);
21643 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21644 ins_encode %{
21645 assert(UseAVX > 2, "required");
21646
21647 int opcode = this->ideal_Opcode();
21648 int vlen_enc = vector_length_encoding(this);
21649 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21650 %}
21651 ins_pipe( pipe_slow );
21652 %}
21653
21654 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21655 predicate(Matcher::vector_length(n) == 64 &&
21656 n->as_ShiftV()->is_var_shift() &&
21657 VM_Version::supports_avx512bw());
21658 match(Set dst ( LShiftVB src shift));
21659 match(Set dst ( RShiftVB src shift));
21660 match(Set dst (URShiftVB src shift));
21661 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21662 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21663 ins_encode %{
21664 assert(UseAVX > 2, "required");
21665
21666 int opcode = this->ideal_Opcode();
21667 int vlen_enc = Assembler::AVX_256bit;
21668 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21669 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21670 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21671 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21672 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21673 %}
21674 ins_pipe( pipe_slow );
21675 %}
21676
21677 // Short variable shift
21678 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21679 predicate(Matcher::vector_length(n) <= 8 &&
21680 n->as_ShiftV()->is_var_shift() &&
21681 !VM_Version::supports_avx512bw());
21682 match(Set dst ( LShiftVS src shift));
21683 match(Set dst ( RShiftVS src shift));
21684 match(Set dst (URShiftVS src shift));
21685 effect(TEMP dst, TEMP vtmp);
21686 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21687 ins_encode %{
21688 assert(UseAVX >= 2, "required");
21689
21690 int opcode = this->ideal_Opcode();
21691 bool sign = (opcode != Op_URShiftVS);
21692 int vlen_enc = Assembler::AVX_256bit;
21693 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21694 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21695 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21696 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21697 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21698 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21699 %}
21700 ins_pipe( pipe_slow );
21701 %}
21702
21703 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21704 predicate(Matcher::vector_length(n) == 16 &&
21705 n->as_ShiftV()->is_var_shift() &&
21706 !VM_Version::supports_avx512bw());
21707 match(Set dst ( LShiftVS src shift));
21708 match(Set dst ( RShiftVS src shift));
21709 match(Set dst (URShiftVS src shift));
21710 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21711 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21712 ins_encode %{
21713 assert(UseAVX >= 2, "required");
21714
21715 int opcode = this->ideal_Opcode();
21716 bool sign = (opcode != Op_URShiftVS);
21717 int vlen_enc = Assembler::AVX_256bit;
21718 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21719 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21720 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21721 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21722 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21723
21724 // Shift upper half, with result in dst using vtmp1 as TEMP
21725 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21726 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21727 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21728 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21729 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21730 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21731
21732 // Merge lower and upper half result into dst
21733 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21734 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21735 %}
21736 ins_pipe( pipe_slow );
21737 %}
21738
21739 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21740 predicate(n->as_ShiftV()->is_var_shift() &&
21741 VM_Version::supports_avx512bw());
21742 match(Set dst ( LShiftVS src shift));
21743 match(Set dst ( RShiftVS src shift));
21744 match(Set dst (URShiftVS src shift));
21745 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21746 ins_encode %{
21747 assert(UseAVX > 2, "required");
21748
21749 int opcode = this->ideal_Opcode();
21750 int vlen_enc = vector_length_encoding(this);
21751 if (!VM_Version::supports_avx512vl()) {
21752 vlen_enc = Assembler::AVX_512bit;
21753 }
21754 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21755 %}
21756 ins_pipe( pipe_slow );
21757 %}
21758
21759 //Integer variable shift
21760 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21761 predicate(n->as_ShiftV()->is_var_shift());
21762 match(Set dst ( LShiftVI src shift));
21763 match(Set dst ( RShiftVI src shift));
21764 match(Set dst (URShiftVI src shift));
21765 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21766 ins_encode %{
21767 assert(UseAVX >= 2, "required");
21768
21769 int opcode = this->ideal_Opcode();
21770 int vlen_enc = vector_length_encoding(this);
21771 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21772 %}
21773 ins_pipe( pipe_slow );
21774 %}
21775
21776 //Long variable shift
21777 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21778 predicate(n->as_ShiftV()->is_var_shift());
21779 match(Set dst ( LShiftVL src shift));
21780 match(Set dst (URShiftVL src shift));
21781 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21782 ins_encode %{
21783 assert(UseAVX >= 2, "required");
21784
21785 int opcode = this->ideal_Opcode();
21786 int vlen_enc = vector_length_encoding(this);
21787 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21788 %}
21789 ins_pipe( pipe_slow );
21790 %}
21791
21792 //Long variable right shift arithmetic
21793 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21794 predicate(Matcher::vector_length(n) <= 4 &&
21795 n->as_ShiftV()->is_var_shift() &&
21796 UseAVX == 2);
21797 match(Set dst (RShiftVL src shift));
21798 effect(TEMP dst, TEMP vtmp);
21799 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21800 ins_encode %{
21801 int opcode = this->ideal_Opcode();
21802 int vlen_enc = vector_length_encoding(this);
21803 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21804 $vtmp$$XMMRegister);
21805 %}
21806 ins_pipe( pipe_slow );
21807 %}
21808
21809 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21810 predicate(n->as_ShiftV()->is_var_shift() &&
21811 UseAVX > 2);
21812 match(Set dst (RShiftVL src shift));
21813 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21814 ins_encode %{
21815 int opcode = this->ideal_Opcode();
21816 int vlen_enc = vector_length_encoding(this);
21817 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21818 %}
21819 ins_pipe( pipe_slow );
21820 %}
21821
21822 // --------------------------------- AND --------------------------------------
21823
21824 instruct vand(vec dst, vec src) %{
21825 predicate(UseAVX == 0);
21826 match(Set dst (AndV dst src));
21827 format %{ "pand $dst,$src\t! and vectors" %}
21828 ins_encode %{
21829 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21830 %}
21831 ins_pipe( pipe_slow );
21832 %}
21833
21834 instruct vand_reg(vec dst, vec src1, vec src2) %{
21835 predicate(UseAVX > 0);
21836 match(Set dst (AndV src1 src2));
21837 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21838 ins_encode %{
21839 int vlen_enc = vector_length_encoding(this);
21840 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21841 %}
21842 ins_pipe( pipe_slow );
21843 %}
21844
21845 instruct vand_mem(vec dst, vec src, memory mem) %{
21846 predicate((UseAVX > 0) &&
21847 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21848 match(Set dst (AndV src (LoadVector mem)));
21849 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21850 ins_encode %{
21851 int vlen_enc = vector_length_encoding(this);
21852 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21853 %}
21854 ins_pipe( pipe_slow );
21855 %}
21856
21857 // --------------------------------- OR ---------------------------------------
21858
21859 instruct vor(vec dst, vec src) %{
21860 predicate(UseAVX == 0);
21861 match(Set dst (OrV dst src));
21862 format %{ "por $dst,$src\t! or vectors" %}
21863 ins_encode %{
21864 __ por($dst$$XMMRegister, $src$$XMMRegister);
21865 %}
21866 ins_pipe( pipe_slow );
21867 %}
21868
21869 instruct vor_reg(vec dst, vec src1, vec src2) %{
21870 predicate(UseAVX > 0);
21871 match(Set dst (OrV src1 src2));
21872 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21873 ins_encode %{
21874 int vlen_enc = vector_length_encoding(this);
21875 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21876 %}
21877 ins_pipe( pipe_slow );
21878 %}
21879
21880 instruct vor_mem(vec dst, vec src, memory mem) %{
21881 predicate((UseAVX > 0) &&
21882 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21883 match(Set dst (OrV src (LoadVector mem)));
21884 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21885 ins_encode %{
21886 int vlen_enc = vector_length_encoding(this);
21887 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21888 %}
21889 ins_pipe( pipe_slow );
21890 %}
21891
21892 // --------------------------------- XOR --------------------------------------
21893
21894 instruct vxor(vec dst, vec src) %{
21895 predicate(UseAVX == 0);
21896 match(Set dst (XorV dst src));
21897 format %{ "pxor $dst,$src\t! xor vectors" %}
21898 ins_encode %{
21899 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21900 %}
21901 ins_pipe( pipe_slow );
21902 %}
21903
21904 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21905 predicate(UseAVX > 0);
21906 match(Set dst (XorV src1 src2));
21907 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21908 ins_encode %{
21909 int vlen_enc = vector_length_encoding(this);
21910 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21911 %}
21912 ins_pipe( pipe_slow );
21913 %}
21914
21915 instruct vxor_mem(vec dst, vec src, memory mem) %{
21916 predicate((UseAVX > 0) &&
21917 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21918 match(Set dst (XorV src (LoadVector mem)));
21919 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21920 ins_encode %{
21921 int vlen_enc = vector_length_encoding(this);
21922 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21923 %}
21924 ins_pipe( pipe_slow );
21925 %}
21926
21927 // --------------------------------- VectorCast --------------------------------------
21928
21929 instruct vcastBtoX(vec dst, vec src) %{
21930 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21931 match(Set dst (VectorCastB2X src));
21932 format %{ "vector_cast_b2x $dst,$src\t!" %}
21933 ins_encode %{
21934 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21935 int vlen_enc = vector_length_encoding(this);
21936 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21937 %}
21938 ins_pipe( pipe_slow );
21939 %}
21940
21941 instruct vcastBtoD(legVec dst, legVec src) %{
21942 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21943 match(Set dst (VectorCastB2X src));
21944 format %{ "vector_cast_b2x $dst,$src\t!" %}
21945 ins_encode %{
21946 int vlen_enc = vector_length_encoding(this);
21947 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21948 %}
21949 ins_pipe( pipe_slow );
21950 %}
21951
21952 instruct castStoX(vec dst, vec src) %{
21953 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21954 Matcher::vector_length(n->in(1)) <= 8 && // src
21955 Matcher::vector_element_basic_type(n) == T_BYTE);
21956 match(Set dst (VectorCastS2X src));
21957 format %{ "vector_cast_s2x $dst,$src" %}
21958 ins_encode %{
21959 assert(UseAVX > 0, "required");
21960
21961 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21962 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21963 %}
21964 ins_pipe( pipe_slow );
21965 %}
21966
21967 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21968 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21969 Matcher::vector_length(n->in(1)) == 16 && // src
21970 Matcher::vector_element_basic_type(n) == T_BYTE);
21971 effect(TEMP dst, TEMP vtmp);
21972 match(Set dst (VectorCastS2X src));
21973 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21974 ins_encode %{
21975 assert(UseAVX > 0, "required");
21976
21977 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21978 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21979 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21980 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21981 %}
21982 ins_pipe( pipe_slow );
21983 %}
21984
21985 instruct vcastStoX_evex(vec dst, vec src) %{
21986 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21987 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21988 match(Set dst (VectorCastS2X src));
21989 format %{ "vector_cast_s2x $dst,$src\t!" %}
21990 ins_encode %{
21991 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21992 int src_vlen_enc = vector_length_encoding(this, $src);
21993 int vlen_enc = vector_length_encoding(this);
21994 switch (to_elem_bt) {
21995 case T_BYTE:
21996 if (!VM_Version::supports_avx512vl()) {
21997 vlen_enc = Assembler::AVX_512bit;
21998 }
21999 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22000 break;
22001 case T_INT:
22002 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22003 break;
22004 case T_FLOAT:
22005 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22006 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22007 break;
22008 case T_LONG:
22009 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22010 break;
22011 case T_DOUBLE: {
22012 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22013 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22014 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22015 break;
22016 }
22017 default:
22018 ShouldNotReachHere();
22019 }
22020 %}
22021 ins_pipe( pipe_slow );
22022 %}
22023
22024 instruct castItoX(vec dst, vec src) %{
22025 predicate(UseAVX <= 2 &&
22026 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22027 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22028 match(Set dst (VectorCastI2X src));
22029 format %{ "vector_cast_i2x $dst,$src" %}
22030 ins_encode %{
22031 assert(UseAVX > 0, "required");
22032
22033 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22034 int vlen_enc = vector_length_encoding(this, $src);
22035
22036 if (to_elem_bt == T_BYTE) {
22037 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22038 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22039 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22040 } else {
22041 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22042 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22043 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22044 }
22045 %}
22046 ins_pipe( pipe_slow );
22047 %}
22048
22049 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22050 predicate(UseAVX <= 2 &&
22051 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22052 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22053 match(Set dst (VectorCastI2X src));
22054 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22055 effect(TEMP dst, TEMP vtmp);
22056 ins_encode %{
22057 assert(UseAVX > 0, "required");
22058
22059 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22060 int vlen_enc = vector_length_encoding(this, $src);
22061
22062 if (to_elem_bt == T_BYTE) {
22063 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22064 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22065 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22066 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22067 } else {
22068 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22069 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22070 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22071 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22072 }
22073 %}
22074 ins_pipe( pipe_slow );
22075 %}
22076
22077 instruct vcastItoX_evex(vec dst, vec src) %{
22078 predicate(UseAVX > 2 ||
22079 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22080 match(Set dst (VectorCastI2X src));
22081 format %{ "vector_cast_i2x $dst,$src\t!" %}
22082 ins_encode %{
22083 assert(UseAVX > 0, "required");
22084
22085 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22086 int src_vlen_enc = vector_length_encoding(this, $src);
22087 int dst_vlen_enc = vector_length_encoding(this);
22088 switch (dst_elem_bt) {
22089 case T_BYTE:
22090 if (!VM_Version::supports_avx512vl()) {
22091 src_vlen_enc = Assembler::AVX_512bit;
22092 }
22093 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22094 break;
22095 case T_SHORT:
22096 if (!VM_Version::supports_avx512vl()) {
22097 src_vlen_enc = Assembler::AVX_512bit;
22098 }
22099 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22100 break;
22101 case T_FLOAT:
22102 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22103 break;
22104 case T_LONG:
22105 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22106 break;
22107 case T_DOUBLE:
22108 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22109 break;
22110 default:
22111 ShouldNotReachHere();
22112 }
22113 %}
22114 ins_pipe( pipe_slow );
22115 %}
22116
22117 instruct vcastLtoBS(vec dst, vec src) %{
22118 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22119 UseAVX <= 2);
22120 match(Set dst (VectorCastL2X src));
22121 format %{ "vector_cast_l2x $dst,$src" %}
22122 ins_encode %{
22123 assert(UseAVX > 0, "required");
22124
22125 int vlen = Matcher::vector_length_in_bytes(this, $src);
22126 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22127 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22128 : ExternalAddress(vector_int_to_short_mask());
22129 if (vlen <= 16) {
22130 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22131 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22132 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22133 } else {
22134 assert(vlen <= 32, "required");
22135 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22136 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22137 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22138 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22139 }
22140 if (to_elem_bt == T_BYTE) {
22141 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22142 }
22143 %}
22144 ins_pipe( pipe_slow );
22145 %}
22146
22147 instruct vcastLtoX_evex(vec dst, vec src) %{
22148 predicate(UseAVX > 2 ||
22149 (Matcher::vector_element_basic_type(n) == T_INT ||
22150 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22151 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22152 match(Set dst (VectorCastL2X src));
22153 format %{ "vector_cast_l2x $dst,$src\t!" %}
22154 ins_encode %{
22155 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22156 int vlen = Matcher::vector_length_in_bytes(this, $src);
22157 int vlen_enc = vector_length_encoding(this, $src);
22158 switch (to_elem_bt) {
22159 case T_BYTE:
22160 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22161 vlen_enc = Assembler::AVX_512bit;
22162 }
22163 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22164 break;
22165 case T_SHORT:
22166 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22167 vlen_enc = Assembler::AVX_512bit;
22168 }
22169 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22170 break;
22171 case T_INT:
22172 if (vlen == 8) {
22173 if ($dst$$XMMRegister != $src$$XMMRegister) {
22174 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22175 }
22176 } else if (vlen == 16) {
22177 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22178 } else if (vlen == 32) {
22179 if (UseAVX > 2) {
22180 if (!VM_Version::supports_avx512vl()) {
22181 vlen_enc = Assembler::AVX_512bit;
22182 }
22183 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22184 } else {
22185 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22186 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22187 }
22188 } else { // vlen == 64
22189 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22190 }
22191 break;
22192 case T_FLOAT:
22193 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22194 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22195 break;
22196 case T_DOUBLE:
22197 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22198 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22199 break;
22200
22201 default: assert(false, "%s", type2name(to_elem_bt));
22202 }
22203 %}
22204 ins_pipe( pipe_slow );
22205 %}
22206
22207 instruct vcastFtoD_reg(vec dst, vec src) %{
22208 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22209 match(Set dst (VectorCastF2X src));
22210 format %{ "vector_cast_f2d $dst,$src\t!" %}
22211 ins_encode %{
22212 int vlen_enc = vector_length_encoding(this);
22213 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214 %}
22215 ins_pipe( pipe_slow );
22216 %}
22217
22218
22219 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22220 predicate(!VM_Version::supports_avx10_2() &&
22221 !VM_Version::supports_avx512vl() &&
22222 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22223 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22224 is_integral_type(Matcher::vector_element_basic_type(n)));
22225 match(Set dst (VectorCastF2X src));
22226 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22227 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22228 ins_encode %{
22229 int vlen_enc = vector_length_encoding(this, $src);
22230 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22231 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22232 // 32 bit addresses for register indirect addressing mode since stub constants
22233 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22234 // However, targets are free to increase this limit, but having a large code cache size
22235 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22236 // cap we save a temporary register allocation which in limiting case can prevent
22237 // spilling in high register pressure blocks.
22238 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22239 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22240 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22241 %}
22242 ins_pipe( pipe_slow );
22243 %}
22244
22245 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22246 predicate(!VM_Version::supports_avx10_2() &&
22247 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22248 is_integral_type(Matcher::vector_element_basic_type(n)));
22249 match(Set dst (VectorCastF2X src));
22250 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22251 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22252 ins_encode %{
22253 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22254 if (to_elem_bt == T_LONG) {
22255 int vlen_enc = vector_length_encoding(this);
22256 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22257 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22258 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22259 } else {
22260 int vlen_enc = vector_length_encoding(this, $src);
22261 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22262 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22263 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22264 }
22265 %}
22266 ins_pipe( pipe_slow );
22267 %}
22268
22269 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22270 predicate(VM_Version::supports_avx10_2() &&
22271 is_integral_type(Matcher::vector_element_basic_type(n)));
22272 match(Set dst (VectorCastF2X src));
22273 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22274 ins_encode %{
22275 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22276 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22277 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22278 %}
22279 ins_pipe( pipe_slow );
22280 %}
22281
22282 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22283 predicate(VM_Version::supports_avx10_2() &&
22284 is_integral_type(Matcher::vector_element_basic_type(n)));
22285 match(Set dst (VectorCastF2X (LoadVector src)));
22286 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22287 ins_encode %{
22288 int vlen = Matcher::vector_length(this);
22289 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22290 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22291 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22292 %}
22293 ins_pipe( pipe_slow );
22294 %}
22295
22296 instruct vcastDtoF_reg(vec dst, vec src) %{
22297 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22298 match(Set dst (VectorCastD2X src));
22299 format %{ "vector_cast_d2x $dst,$src\t!" %}
22300 ins_encode %{
22301 int vlen_enc = vector_length_encoding(this, $src);
22302 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22303 %}
22304 ins_pipe( pipe_slow );
22305 %}
22306
22307 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22308 predicate(!VM_Version::supports_avx10_2() &&
22309 !VM_Version::supports_avx512vl() &&
22310 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22311 is_integral_type(Matcher::vector_element_basic_type(n)));
22312 match(Set dst (VectorCastD2X src));
22313 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22314 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22315 ins_encode %{
22316 int vlen_enc = vector_length_encoding(this, $src);
22317 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22318 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22319 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22320 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22321 %}
22322 ins_pipe( pipe_slow );
22323 %}
22324
22325 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22326 predicate(!VM_Version::supports_avx10_2() &&
22327 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22328 is_integral_type(Matcher::vector_element_basic_type(n)));
22329 match(Set dst (VectorCastD2X src));
22330 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22331 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22332 ins_encode %{
22333 int vlen_enc = vector_length_encoding(this, $src);
22334 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22335 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22336 ExternalAddress(vector_float_signflip());
22337 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22338 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22339 %}
22340 ins_pipe( pipe_slow );
22341 %}
22342
22343 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22344 predicate(VM_Version::supports_avx10_2() &&
22345 is_integral_type(Matcher::vector_element_basic_type(n)));
22346 match(Set dst (VectorCastD2X src));
22347 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22348 ins_encode %{
22349 int vlen_enc = vector_length_encoding(this, $src);
22350 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22351 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22352 %}
22353 ins_pipe( pipe_slow );
22354 %}
22355
22356 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22357 predicate(VM_Version::supports_avx10_2() &&
22358 is_integral_type(Matcher::vector_element_basic_type(n)));
22359 match(Set dst (VectorCastD2X (LoadVector src)));
22360 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22361 ins_encode %{
22362 int vlen = Matcher::vector_length(this);
22363 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22364 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22365 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22366 %}
22367 ins_pipe( pipe_slow );
22368 %}
22369
22370 instruct vucast(vec dst, vec src) %{
22371 match(Set dst (VectorUCastB2X src));
22372 match(Set dst (VectorUCastS2X src));
22373 match(Set dst (VectorUCastI2X src));
22374 format %{ "vector_ucast $dst,$src\t!" %}
22375 ins_encode %{
22376 assert(UseAVX > 0, "required");
22377
22378 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22379 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22380 int vlen_enc = vector_length_encoding(this);
22381 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22382 %}
22383 ins_pipe( pipe_slow );
22384 %}
22385
22386 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22387 predicate(!VM_Version::supports_avx512vl() &&
22388 Matcher::vector_length_in_bytes(n) < 64 &&
22389 Matcher::vector_element_basic_type(n) == T_INT);
22390 match(Set dst (RoundVF src));
22391 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22392 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22393 ins_encode %{
22394 int vlen_enc = vector_length_encoding(this);
22395 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22396 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22397 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22398 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22399 %}
22400 ins_pipe( pipe_slow );
22401 %}
22402
22403 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22404 predicate((VM_Version::supports_avx512vl() ||
22405 Matcher::vector_length_in_bytes(n) == 64) &&
22406 Matcher::vector_element_basic_type(n) == T_INT);
22407 match(Set dst (RoundVF src));
22408 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22409 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22410 ins_encode %{
22411 int vlen_enc = vector_length_encoding(this);
22412 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22413 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22414 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22415 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22416 %}
22417 ins_pipe( pipe_slow );
22418 %}
22419
22420 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22421 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22422 match(Set dst (RoundVD src));
22423 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22424 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22425 ins_encode %{
22426 int vlen_enc = vector_length_encoding(this);
22427 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22428 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22429 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22430 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22431 %}
22432 ins_pipe( pipe_slow );
22433 %}
22434
22435 // --------------------------------- VectorMaskCmp --------------------------------------
22436
22437 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22438 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22439 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22440 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22441 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22442 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22443 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22444 ins_encode %{
22445 int vlen_enc = vector_length_encoding(this, $src1);
22446 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22447 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22448 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22449 } else {
22450 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22451 }
22452 %}
22453 ins_pipe( pipe_slow );
22454 %}
22455
22456 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22457 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22458 n->bottom_type()->isa_vectmask() == nullptr &&
22459 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22460 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22461 effect(TEMP ktmp);
22462 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22463 ins_encode %{
22464 int vlen_enc = Assembler::AVX_512bit;
22465 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22466 KRegister mask = k0; // The comparison itself is not being masked.
22467 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22468 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22469 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22470 } else {
22471 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22472 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22473 }
22474 %}
22475 ins_pipe( pipe_slow );
22476 %}
22477
22478 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22479 predicate(n->bottom_type()->isa_vectmask() &&
22480 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22481 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22482 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22483 ins_encode %{
22484 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22485 int vlen_enc = vector_length_encoding(this, $src1);
22486 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22487 KRegister mask = k0; // The comparison itself is not being masked.
22488 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22489 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22490 } else {
22491 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22492 }
22493 %}
22494 ins_pipe( pipe_slow );
22495 %}
22496
22497 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22498 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22499 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22500 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22501 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22502 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22503 (n->in(2)->get_int() == BoolTest::eq ||
22504 n->in(2)->get_int() == BoolTest::lt ||
22505 n->in(2)->get_int() == BoolTest::gt)); // cond
22506 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22507 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22508 ins_encode %{
22509 int vlen_enc = vector_length_encoding(this, $src1);
22510 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22511 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22512 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22513 %}
22514 ins_pipe( pipe_slow );
22515 %}
22516
22517 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22518 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22519 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22520 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22521 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22522 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22523 (n->in(2)->get_int() == BoolTest::ne ||
22524 n->in(2)->get_int() == BoolTest::le ||
22525 n->in(2)->get_int() == BoolTest::ge)); // cond
22526 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22527 effect(TEMP dst, TEMP xtmp);
22528 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22529 ins_encode %{
22530 int vlen_enc = vector_length_encoding(this, $src1);
22531 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22532 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22533 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22534 %}
22535 ins_pipe( pipe_slow );
22536 %}
22537
22538 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22539 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22540 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22541 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22542 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22543 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22544 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22545 effect(TEMP dst, TEMP xtmp);
22546 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22547 ins_encode %{
22548 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22549 int vlen_enc = vector_length_encoding(this, $src1);
22550 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22551 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22552
22553 if (vlen_enc == Assembler::AVX_128bit) {
22554 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22555 } else {
22556 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22557 }
22558 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22559 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22560 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22561 %}
22562 ins_pipe( pipe_slow );
22563 %}
22564
22565 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22566 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22567 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22568 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22569 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22570 effect(TEMP ktmp);
22571 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22572 ins_encode %{
22573 assert(UseAVX > 2, "required");
22574
22575 int vlen_enc = vector_length_encoding(this, $src1);
22576 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22577 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22578 KRegister mask = k0; // The comparison itself is not being masked.
22579 bool merge = false;
22580 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22581
22582 switch (src1_elem_bt) {
22583 case T_INT: {
22584 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22585 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22586 break;
22587 }
22588 case T_LONG: {
22589 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22590 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22591 break;
22592 }
22593 default: assert(false, "%s", type2name(src1_elem_bt));
22594 }
22595 %}
22596 ins_pipe( pipe_slow );
22597 %}
22598
22599
22600 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22601 predicate(n->bottom_type()->isa_vectmask() &&
22602 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22603 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22604 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22605 ins_encode %{
22606 assert(UseAVX > 2, "required");
22607 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22608
22609 int vlen_enc = vector_length_encoding(this, $src1);
22610 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22611 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22612 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22613
22614 // Comparison i
22615 switch (src1_elem_bt) {
22616 case T_BYTE: {
22617 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22618 break;
22619 }
22620 case T_SHORT: {
22621 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22622 break;
22623 }
22624 case T_INT: {
22625 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22626 break;
22627 }
22628 case T_LONG: {
22629 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22630 break;
22631 }
22632 default: assert(false, "%s", type2name(src1_elem_bt));
22633 }
22634 %}
22635 ins_pipe( pipe_slow );
22636 %}
22637
22638 // Extract
22639
22640 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22641 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22642 match(Set dst (ExtractI src idx));
22643 match(Set dst (ExtractS src idx));
22644 match(Set dst (ExtractB src idx));
22645 format %{ "extractI $dst,$src,$idx\t!" %}
22646 ins_encode %{
22647 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22648
22649 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22650 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22651 %}
22652 ins_pipe( pipe_slow );
22653 %}
22654
22655 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22656 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22657 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22658 match(Set dst (ExtractI src idx));
22659 match(Set dst (ExtractS src idx));
22660 match(Set dst (ExtractB src idx));
22661 effect(TEMP vtmp);
22662 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22663 ins_encode %{
22664 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22665
22666 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22667 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22668 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22669 %}
22670 ins_pipe( pipe_slow );
22671 %}
22672
22673 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22674 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22675 match(Set dst (ExtractL src idx));
22676 format %{ "extractL $dst,$src,$idx\t!" %}
22677 ins_encode %{
22678 assert(UseSSE >= 4, "required");
22679 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22680
22681 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22682 %}
22683 ins_pipe( pipe_slow );
22684 %}
22685
22686 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22687 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22688 Matcher::vector_length(n->in(1)) == 8); // src
22689 match(Set dst (ExtractL src idx));
22690 effect(TEMP vtmp);
22691 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22692 ins_encode %{
22693 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22694
22695 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22696 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22697 %}
22698 ins_pipe( pipe_slow );
22699 %}
22700
22701 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22702 predicate(Matcher::vector_length(n->in(1)) <= 4);
22703 match(Set dst (ExtractF src idx));
22704 effect(TEMP dst, TEMP vtmp);
22705 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22706 ins_encode %{
22707 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22708
22709 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22710 %}
22711 ins_pipe( pipe_slow );
22712 %}
22713
22714 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22715 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22716 Matcher::vector_length(n->in(1)/*src*/) == 16);
22717 match(Set dst (ExtractF src idx));
22718 effect(TEMP vtmp);
22719 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22720 ins_encode %{
22721 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22722
22723 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22724 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22725 %}
22726 ins_pipe( pipe_slow );
22727 %}
22728
22729 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22730 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22731 match(Set dst (ExtractD src idx));
22732 format %{ "extractD $dst,$src,$idx\t!" %}
22733 ins_encode %{
22734 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22735
22736 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22737 %}
22738 ins_pipe( pipe_slow );
22739 %}
22740
22741 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22742 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22743 Matcher::vector_length(n->in(1)) == 8); // src
22744 match(Set dst (ExtractD src idx));
22745 effect(TEMP vtmp);
22746 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22747 ins_encode %{
22748 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22749
22750 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22751 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22752 %}
22753 ins_pipe( pipe_slow );
22754 %}
22755
22756 // --------------------------------- Vector Blend --------------------------------------
22757
22758 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22759 predicate(UseAVX == 0);
22760 match(Set dst (VectorBlend (Binary dst src) mask));
22761 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22762 effect(TEMP tmp);
22763 ins_encode %{
22764 assert(UseSSE >= 4, "required");
22765
22766 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22767 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22768 }
22769 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22770 %}
22771 ins_pipe( pipe_slow );
22772 %}
22773
22774 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22775 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22776 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22777 Matcher::vector_length_in_bytes(n) <= 32 &&
22778 is_integral_type(Matcher::vector_element_basic_type(n)));
22779 match(Set dst (VectorBlend (Binary src1 src2) mask));
22780 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22781 ins_encode %{
22782 int vlen_enc = vector_length_encoding(this);
22783 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22784 %}
22785 ins_pipe( pipe_slow );
22786 %}
22787
22788 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22789 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22790 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22791 Matcher::vector_length_in_bytes(n) <= 32 &&
22792 !is_integral_type(Matcher::vector_element_basic_type(n)));
22793 match(Set dst (VectorBlend (Binary src1 src2) mask));
22794 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22795 ins_encode %{
22796 int vlen_enc = vector_length_encoding(this);
22797 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22798 %}
22799 ins_pipe( pipe_slow );
22800 %}
22801
22802 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22803 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22804 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22805 Matcher::vector_length_in_bytes(n) <= 32);
22806 match(Set dst (VectorBlend (Binary src1 src2) mask));
22807 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22808 effect(TEMP vtmp, TEMP dst);
22809 ins_encode %{
22810 int vlen_enc = vector_length_encoding(this);
22811 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22812 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22813 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22814 %}
22815 ins_pipe( pipe_slow );
22816 %}
22817
22818 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22819 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22820 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22821 match(Set dst (VectorBlend (Binary src1 src2) mask));
22822 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22823 effect(TEMP ktmp);
22824 ins_encode %{
22825 int vlen_enc = Assembler::AVX_512bit;
22826 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22827 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22828 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22829 %}
22830 ins_pipe( pipe_slow );
22831 %}
22832
22833
22834 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22835 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22836 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22837 VM_Version::supports_avx512bw()));
22838 match(Set dst (VectorBlend (Binary src1 src2) mask));
22839 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22840 ins_encode %{
22841 int vlen_enc = vector_length_encoding(this);
22842 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22843 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22844 %}
22845 ins_pipe( pipe_slow );
22846 %}
22847
22848 // --------------------------------- ABS --------------------------------------
22849 // a = |a|
22850 instruct vabsB_reg(vec dst, vec src) %{
22851 match(Set dst (AbsVB src));
22852 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22853 ins_encode %{
22854 uint vlen = Matcher::vector_length(this);
22855 if (vlen <= 16) {
22856 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22857 } else {
22858 int vlen_enc = vector_length_encoding(this);
22859 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22860 }
22861 %}
22862 ins_pipe( pipe_slow );
22863 %}
22864
22865 instruct vabsS_reg(vec dst, vec src) %{
22866 match(Set dst (AbsVS src));
22867 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22868 ins_encode %{
22869 uint vlen = Matcher::vector_length(this);
22870 if (vlen <= 8) {
22871 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22872 } else {
22873 int vlen_enc = vector_length_encoding(this);
22874 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22875 }
22876 %}
22877 ins_pipe( pipe_slow );
22878 %}
22879
22880 instruct vabsI_reg(vec dst, vec src) %{
22881 match(Set dst (AbsVI src));
22882 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22883 ins_encode %{
22884 uint vlen = Matcher::vector_length(this);
22885 if (vlen <= 4) {
22886 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22887 } else {
22888 int vlen_enc = vector_length_encoding(this);
22889 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22890 }
22891 %}
22892 ins_pipe( pipe_slow );
22893 %}
22894
22895 instruct vabsL_reg(vec dst, vec src) %{
22896 match(Set dst (AbsVL src));
22897 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22898 ins_encode %{
22899 assert(UseAVX > 2, "required");
22900 int vlen_enc = vector_length_encoding(this);
22901 if (!VM_Version::supports_avx512vl()) {
22902 vlen_enc = Assembler::AVX_512bit;
22903 }
22904 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22905 %}
22906 ins_pipe( pipe_slow );
22907 %}
22908
22909 // --------------------------------- ABSNEG --------------------------------------
22910
22911 instruct vabsnegF(vec dst, vec src) %{
22912 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22913 match(Set dst (AbsVF src));
22914 match(Set dst (NegVF src));
22915 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22916 ins_cost(150);
22917 ins_encode %{
22918 int opcode = this->ideal_Opcode();
22919 int vlen = Matcher::vector_length(this);
22920 if (vlen == 2) {
22921 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22922 } else {
22923 assert(vlen == 8 || vlen == 16, "required");
22924 int vlen_enc = vector_length_encoding(this);
22925 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22926 }
22927 %}
22928 ins_pipe( pipe_slow );
22929 %}
22930
22931 instruct vabsneg4F(vec dst) %{
22932 predicate(Matcher::vector_length(n) == 4);
22933 match(Set dst (AbsVF dst));
22934 match(Set dst (NegVF dst));
22935 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22936 ins_cost(150);
22937 ins_encode %{
22938 int opcode = this->ideal_Opcode();
22939 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22940 %}
22941 ins_pipe( pipe_slow );
22942 %}
22943
22944 instruct vabsnegD(vec dst, vec src) %{
22945 match(Set dst (AbsVD src));
22946 match(Set dst (NegVD src));
22947 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22948 ins_encode %{
22949 int opcode = this->ideal_Opcode();
22950 uint vlen = Matcher::vector_length(this);
22951 if (vlen == 2) {
22952 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22953 } else {
22954 int vlen_enc = vector_length_encoding(this);
22955 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22956 }
22957 %}
22958 ins_pipe( pipe_slow );
22959 %}
22960
22961 //------------------------------------- VectorTest --------------------------------------------
22962
22963 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22964 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22965 match(Set cr (VectorTest src1 src2));
22966 effect(TEMP vtmp);
22967 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22968 ins_encode %{
22969 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22970 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22971 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22972 %}
22973 ins_pipe( pipe_slow );
22974 %}
22975
22976 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22977 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22978 match(Set cr (VectorTest src1 src2));
22979 format %{ "vptest_ge16 $src1, $src2\n\t" %}
22980 ins_encode %{
22981 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22982 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22983 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22984 %}
22985 ins_pipe( pipe_slow );
22986 %}
22987
22988 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22989 predicate((Matcher::vector_length(n->in(1)) < 8 ||
22990 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22991 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22992 match(Set cr (VectorTest src1 src2));
22993 effect(TEMP tmp);
22994 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
22995 ins_encode %{
22996 uint masklen = Matcher::vector_length(this, $src1);
22997 __ kmovwl($tmp$$Register, $src1$$KRegister);
22998 __ andl($tmp$$Register, (1 << masklen) - 1);
22999 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23000 %}
23001 ins_pipe( pipe_slow );
23002 %}
23003
23004 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23005 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23006 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23007 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23008 match(Set cr (VectorTest src1 src2));
23009 effect(TEMP tmp);
23010 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23011 ins_encode %{
23012 uint masklen = Matcher::vector_length(this, $src1);
23013 __ kmovwl($tmp$$Register, $src1$$KRegister);
23014 __ andl($tmp$$Register, (1 << masklen) - 1);
23015 %}
23016 ins_pipe( pipe_slow );
23017 %}
23018
23019 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23020 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23021 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23022 match(Set cr (VectorTest src1 src2));
23023 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23024 ins_encode %{
23025 uint masklen = Matcher::vector_length(this, $src1);
23026 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23027 %}
23028 ins_pipe( pipe_slow );
23029 %}
23030
23031 //------------------------------------- LoadMask --------------------------------------------
23032
23033 instruct loadMask(legVec dst, legVec src) %{
23034 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23035 match(Set dst (VectorLoadMask src));
23036 effect(TEMP dst);
23037 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23038 ins_encode %{
23039 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23040 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23041 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23042 %}
23043 ins_pipe( pipe_slow );
23044 %}
23045
23046 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23047 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23048 match(Set dst (VectorLoadMask src));
23049 effect(TEMP xtmp);
23050 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23051 ins_encode %{
23052 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23053 true, Assembler::AVX_512bit);
23054 %}
23055 ins_pipe( pipe_slow );
23056 %}
23057
23058 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23059 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23060 match(Set dst (VectorLoadMask src));
23061 effect(TEMP xtmp);
23062 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23063 ins_encode %{
23064 int vlen_enc = vector_length_encoding(in(1));
23065 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23066 false, vlen_enc);
23067 %}
23068 ins_pipe( pipe_slow );
23069 %}
23070
23071 //------------------------------------- StoreMask --------------------------------------------
23072
23073 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23074 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23075 match(Set dst (VectorStoreMask src size));
23076 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23077 ins_encode %{
23078 int vlen = Matcher::vector_length(this);
23079 if (vlen <= 16 && UseAVX <= 2) {
23080 assert(UseSSE >= 3, "required");
23081 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23082 } else {
23083 assert(UseAVX > 0, "required");
23084 int src_vlen_enc = vector_length_encoding(this, $src);
23085 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23086 }
23087 %}
23088 ins_pipe( pipe_slow );
23089 %}
23090
23091 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23092 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23093 match(Set dst (VectorStoreMask src size));
23094 effect(TEMP_DEF dst, TEMP xtmp);
23095 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23096 ins_encode %{
23097 int vlen_enc = Assembler::AVX_128bit;
23098 int vlen = Matcher::vector_length(this);
23099 if (vlen <= 8) {
23100 assert(UseSSE >= 3, "required");
23101 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23102 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23103 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23104 } else {
23105 assert(UseAVX > 0, "required");
23106 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23107 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23108 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23109 }
23110 %}
23111 ins_pipe( pipe_slow );
23112 %}
23113
23114 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23115 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23116 match(Set dst (VectorStoreMask src size));
23117 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23118 effect(TEMP_DEF dst, TEMP xtmp);
23119 ins_encode %{
23120 int vlen_enc = Assembler::AVX_128bit;
23121 int vlen = Matcher::vector_length(this);
23122 if (vlen <= 4) {
23123 assert(UseSSE >= 3, "required");
23124 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23125 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23126 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23127 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23128 } else {
23129 assert(UseAVX > 0, "required");
23130 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23131 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23132 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23133 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23134 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23135 }
23136 %}
23137 ins_pipe( pipe_slow );
23138 %}
23139
23140 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23141 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23142 match(Set dst (VectorStoreMask src size));
23143 effect(TEMP_DEF dst, TEMP xtmp);
23144 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23145 ins_encode %{
23146 assert(UseSSE >= 3, "required");
23147 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23148 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23149 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23150 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23151 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23152 %}
23153 ins_pipe( pipe_slow );
23154 %}
23155
23156 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23157 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23158 match(Set dst (VectorStoreMask src size));
23159 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23160 effect(TEMP_DEF dst, TEMP vtmp);
23161 ins_encode %{
23162 int vlen_enc = Assembler::AVX_128bit;
23163 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23164 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23165 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23166 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23167 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23168 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23169 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23170 %}
23171 ins_pipe( pipe_slow );
23172 %}
23173
23174 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23175 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23176 match(Set dst (VectorStoreMask src size));
23177 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23178 ins_encode %{
23179 int src_vlen_enc = vector_length_encoding(this, $src);
23180 int dst_vlen_enc = vector_length_encoding(this);
23181 if (!VM_Version::supports_avx512vl()) {
23182 src_vlen_enc = Assembler::AVX_512bit;
23183 }
23184 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23185 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23186 %}
23187 ins_pipe( pipe_slow );
23188 %}
23189
23190 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23191 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23192 match(Set dst (VectorStoreMask src size));
23193 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23194 ins_encode %{
23195 int src_vlen_enc = vector_length_encoding(this, $src);
23196 int dst_vlen_enc = vector_length_encoding(this);
23197 if (!VM_Version::supports_avx512vl()) {
23198 src_vlen_enc = Assembler::AVX_512bit;
23199 }
23200 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23201 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23202 %}
23203 ins_pipe( pipe_slow );
23204 %}
23205
23206 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23207 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23208 match(Set dst (VectorStoreMask mask size));
23209 effect(TEMP_DEF dst);
23210 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23211 ins_encode %{
23212 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23213 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23214 false, Assembler::AVX_512bit, noreg);
23215 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23216 %}
23217 ins_pipe( pipe_slow );
23218 %}
23219
23220 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23221 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23222 match(Set dst (VectorStoreMask mask size));
23223 effect(TEMP_DEF dst);
23224 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23225 ins_encode %{
23226 int dst_vlen_enc = vector_length_encoding(this);
23227 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23228 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23229 %}
23230 ins_pipe( pipe_slow );
23231 %}
23232
23233 instruct vmaskcast_evex(kReg dst) %{
23234 match(Set dst (VectorMaskCast dst));
23235 ins_cost(0);
23236 format %{ "vector_mask_cast $dst" %}
23237 ins_encode %{
23238 // empty
23239 %}
23240 ins_pipe(empty);
23241 %}
23242
23243 instruct vmaskcast(vec dst) %{
23244 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23245 match(Set dst (VectorMaskCast dst));
23246 ins_cost(0);
23247 format %{ "vector_mask_cast $dst" %}
23248 ins_encode %{
23249 // empty
23250 %}
23251 ins_pipe(empty);
23252 %}
23253
23254 instruct vmaskcast_avx(vec dst, vec src) %{
23255 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23256 match(Set dst (VectorMaskCast src));
23257 format %{ "vector_mask_cast $dst, $src" %}
23258 ins_encode %{
23259 int vlen = Matcher::vector_length(this);
23260 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23261 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23262 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23263 %}
23264 ins_pipe(pipe_slow);
23265 %}
23266
23267 //-------------------------------- Load Iota Indices ----------------------------------
23268
23269 instruct loadIotaIndices(vec dst, immI_0 src) %{
23270 match(Set dst (VectorLoadConst src));
23271 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23272 ins_encode %{
23273 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23274 BasicType bt = Matcher::vector_element_basic_type(this);
23275 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23276 %}
23277 ins_pipe( pipe_slow );
23278 %}
23279
23280 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23281 match(Set dst (PopulateIndex src1 src2));
23282 effect(TEMP dst, TEMP vtmp);
23283 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23284 ins_encode %{
23285 assert($src2$$constant == 1, "required");
23286 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23287 int vlen_enc = vector_length_encoding(this);
23288 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23289 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23290 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23291 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23292 %}
23293 ins_pipe( pipe_slow );
23294 %}
23295
23296 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23297 match(Set dst (PopulateIndex src1 src2));
23298 effect(TEMP dst, TEMP vtmp);
23299 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23300 ins_encode %{
23301 assert($src2$$constant == 1, "required");
23302 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23303 int vlen_enc = vector_length_encoding(this);
23304 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23305 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23306 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23307 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23308 %}
23309 ins_pipe( pipe_slow );
23310 %}
23311
23312 //-------------------------------- Rearrange ----------------------------------
23313
23314 // LoadShuffle/Rearrange for Byte
23315 instruct rearrangeB(vec dst, vec shuffle) %{
23316 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23317 Matcher::vector_length(n) < 32);
23318 match(Set dst (VectorRearrange dst shuffle));
23319 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23320 ins_encode %{
23321 assert(UseSSE >= 4, "required");
23322 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23323 %}
23324 ins_pipe( pipe_slow );
23325 %}
23326
23327 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23328 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23329 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23330 match(Set dst (VectorRearrange src shuffle));
23331 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23332 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23333 ins_encode %{
23334 assert(UseAVX >= 2, "required");
23335 // Swap src into vtmp1
23336 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23337 // Shuffle swapped src to get entries from other 128 bit lane
23338 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23339 // Shuffle original src to get entries from self 128 bit lane
23340 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23341 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23342 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23343 // Perform the blend
23344 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23345 %}
23346 ins_pipe( pipe_slow );
23347 %}
23348
23349
23350 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23351 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23352 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23353 match(Set dst (VectorRearrange src shuffle));
23354 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23355 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23356 ins_encode %{
23357 int vlen_enc = vector_length_encoding(this);
23358 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23359 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23360 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23361 %}
23362 ins_pipe( pipe_slow );
23363 %}
23364
23365 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23366 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23367 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23368 match(Set dst (VectorRearrange src shuffle));
23369 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23370 ins_encode %{
23371 int vlen_enc = vector_length_encoding(this);
23372 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23373 %}
23374 ins_pipe( pipe_slow );
23375 %}
23376
23377 // LoadShuffle/Rearrange for Short
23378
23379 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23380 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23381 !VM_Version::supports_avx512bw());
23382 match(Set dst (VectorLoadShuffle src));
23383 effect(TEMP dst, TEMP vtmp);
23384 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23385 ins_encode %{
23386 // Create a byte shuffle mask from short shuffle mask
23387 // only byte shuffle instruction available on these platforms
23388 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23389 if (UseAVX == 0) {
23390 assert(vlen_in_bytes <= 16, "required");
23391 // Multiply each shuffle by two to get byte index
23392 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23393 __ psllw($vtmp$$XMMRegister, 1);
23394
23395 // Duplicate to create 2 copies of byte index
23396 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23397 __ psllw($dst$$XMMRegister, 8);
23398 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23399
23400 // Add one to get alternate byte index
23401 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23402 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23403 } else {
23404 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23405 int vlen_enc = vector_length_encoding(this);
23406 // Multiply each shuffle by two to get byte index
23407 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23408
23409 // Duplicate to create 2 copies of byte index
23410 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23411 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23412
23413 // Add one to get alternate byte index
23414 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23415 }
23416 %}
23417 ins_pipe( pipe_slow );
23418 %}
23419
23420 instruct rearrangeS(vec dst, vec shuffle) %{
23421 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23422 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23423 match(Set dst (VectorRearrange dst shuffle));
23424 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23425 ins_encode %{
23426 assert(UseSSE >= 4, "required");
23427 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23428 %}
23429 ins_pipe( pipe_slow );
23430 %}
23431
23432 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23433 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23434 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23435 match(Set dst (VectorRearrange src shuffle));
23436 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23437 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23438 ins_encode %{
23439 assert(UseAVX >= 2, "required");
23440 // Swap src into vtmp1
23441 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23442 // Shuffle swapped src to get entries from other 128 bit lane
23443 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23444 // Shuffle original src to get entries from self 128 bit lane
23445 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23446 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23447 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23448 // Perform the blend
23449 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23450 %}
23451 ins_pipe( pipe_slow );
23452 %}
23453
23454 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23455 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23456 VM_Version::supports_avx512bw());
23457 match(Set dst (VectorRearrange src shuffle));
23458 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23459 ins_encode %{
23460 int vlen_enc = vector_length_encoding(this);
23461 if (!VM_Version::supports_avx512vl()) {
23462 vlen_enc = Assembler::AVX_512bit;
23463 }
23464 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23465 %}
23466 ins_pipe( pipe_slow );
23467 %}
23468
23469 // LoadShuffle/Rearrange for Integer and Float
23470
23471 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23472 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23473 Matcher::vector_length(n) == 4 && UseAVX == 0);
23474 match(Set dst (VectorLoadShuffle src));
23475 effect(TEMP dst, TEMP vtmp);
23476 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23477 ins_encode %{
23478 assert(UseSSE >= 4, "required");
23479
23480 // Create a byte shuffle mask from int shuffle mask
23481 // only byte shuffle instruction available on these platforms
23482
23483 // Duplicate and multiply each shuffle by 4
23484 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23485 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23486 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23487 __ psllw($vtmp$$XMMRegister, 2);
23488
23489 // Duplicate again to create 4 copies of byte index
23490 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23491 __ psllw($dst$$XMMRegister, 8);
23492 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23493
23494 // Add 3,2,1,0 to get alternate byte index
23495 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23496 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23497 %}
23498 ins_pipe( pipe_slow );
23499 %}
23500
23501 instruct rearrangeI(vec dst, vec shuffle) %{
23502 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23503 UseAVX == 0);
23504 match(Set dst (VectorRearrange dst shuffle));
23505 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23506 ins_encode %{
23507 assert(UseSSE >= 4, "required");
23508 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23509 %}
23510 ins_pipe( pipe_slow );
23511 %}
23512
23513 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23514 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23515 UseAVX > 0);
23516 match(Set dst (VectorRearrange src shuffle));
23517 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23518 ins_encode %{
23519 int vlen_enc = vector_length_encoding(this);
23520 BasicType bt = Matcher::vector_element_basic_type(this);
23521 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23522 %}
23523 ins_pipe( pipe_slow );
23524 %}
23525
23526 // LoadShuffle/Rearrange for Long and Double
23527
23528 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23529 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23530 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23531 match(Set dst (VectorLoadShuffle src));
23532 effect(TEMP dst, TEMP vtmp);
23533 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23534 ins_encode %{
23535 assert(UseAVX >= 2, "required");
23536
23537 int vlen_enc = vector_length_encoding(this);
23538 // Create a double word shuffle mask from long shuffle mask
23539 // only double word shuffle instruction available on these platforms
23540
23541 // Multiply each shuffle by two to get double word index
23542 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23543
23544 // Duplicate each double word shuffle
23545 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23546 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23547
23548 // Add one to get alternate double word index
23549 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23550 %}
23551 ins_pipe( pipe_slow );
23552 %}
23553
23554 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23555 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23556 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23557 match(Set dst (VectorRearrange src shuffle));
23558 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23559 ins_encode %{
23560 assert(UseAVX >= 2, "required");
23561
23562 int vlen_enc = vector_length_encoding(this);
23563 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23564 %}
23565 ins_pipe( pipe_slow );
23566 %}
23567
23568 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23569 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23570 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23571 match(Set dst (VectorRearrange src shuffle));
23572 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23573 ins_encode %{
23574 assert(UseAVX > 2, "required");
23575
23576 int vlen_enc = vector_length_encoding(this);
23577 if (vlen_enc == Assembler::AVX_128bit) {
23578 vlen_enc = Assembler::AVX_256bit;
23579 }
23580 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23581 %}
23582 ins_pipe( pipe_slow );
23583 %}
23584
23585 // --------------------------------- FMA --------------------------------------
23586 // a * b + c
23587
23588 instruct vfmaF_reg(vec a, vec b, vec c) %{
23589 match(Set c (FmaVF c (Binary a b)));
23590 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23591 ins_cost(150);
23592 ins_encode %{
23593 assert(UseFMA, "not enabled");
23594 int vlen_enc = vector_length_encoding(this);
23595 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23596 %}
23597 ins_pipe( pipe_slow );
23598 %}
23599
23600 instruct vfmaF_mem(vec a, memory b, vec c) %{
23601 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23602 match(Set c (FmaVF c (Binary a (LoadVector b))));
23603 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23604 ins_cost(150);
23605 ins_encode %{
23606 assert(UseFMA, "not enabled");
23607 int vlen_enc = vector_length_encoding(this);
23608 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23609 %}
23610 ins_pipe( pipe_slow );
23611 %}
23612
23613 instruct vfmaD_reg(vec a, vec b, vec c) %{
23614 match(Set c (FmaVD c (Binary a b)));
23615 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23616 ins_cost(150);
23617 ins_encode %{
23618 assert(UseFMA, "not enabled");
23619 int vlen_enc = vector_length_encoding(this);
23620 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23621 %}
23622 ins_pipe( pipe_slow );
23623 %}
23624
23625 instruct vfmaD_mem(vec a, memory b, vec c) %{
23626 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23627 match(Set c (FmaVD c (Binary a (LoadVector b))));
23628 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23629 ins_cost(150);
23630 ins_encode %{
23631 assert(UseFMA, "not enabled");
23632 int vlen_enc = vector_length_encoding(this);
23633 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23634 %}
23635 ins_pipe( pipe_slow );
23636 %}
23637
23638 // --------------------------------- Vector Multiply Add --------------------------------------
23639
23640 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23641 predicate(UseAVX == 0);
23642 match(Set dst (MulAddVS2VI dst src1));
23643 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23644 ins_encode %{
23645 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23646 %}
23647 ins_pipe( pipe_slow );
23648 %}
23649
23650 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23651 predicate(UseAVX > 0);
23652 match(Set dst (MulAddVS2VI src1 src2));
23653 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23654 ins_encode %{
23655 int vlen_enc = vector_length_encoding(this);
23656 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23657 %}
23658 ins_pipe( pipe_slow );
23659 %}
23660
23661 // --------------------------------- Vector Multiply Add Add ----------------------------------
23662
23663 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23664 predicate(VM_Version::supports_avx512_vnni());
23665 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23666 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23667 ins_encode %{
23668 assert(UseAVX > 2, "required");
23669 int vlen_enc = vector_length_encoding(this);
23670 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23671 %}
23672 ins_pipe( pipe_slow );
23673 ins_cost(10);
23674 %}
23675
23676 // --------------------------------- PopCount --------------------------------------
23677
23678 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23679 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23680 match(Set dst (PopCountVI src));
23681 match(Set dst (PopCountVL src));
23682 format %{ "vector_popcount_integral $dst, $src" %}
23683 ins_encode %{
23684 int opcode = this->ideal_Opcode();
23685 int vlen_enc = vector_length_encoding(this, $src);
23686 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23687 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23688 %}
23689 ins_pipe( pipe_slow );
23690 %}
23691
23692 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23693 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23694 match(Set dst (PopCountVI src mask));
23695 match(Set dst (PopCountVL src mask));
23696 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23697 ins_encode %{
23698 int vlen_enc = vector_length_encoding(this, $src);
23699 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23700 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23701 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23702 %}
23703 ins_pipe( pipe_slow );
23704 %}
23705
23706 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23707 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23708 match(Set dst (PopCountVI src));
23709 match(Set dst (PopCountVL src));
23710 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23711 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23712 ins_encode %{
23713 int opcode = this->ideal_Opcode();
23714 int vlen_enc = vector_length_encoding(this, $src);
23715 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23716 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23717 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23718 %}
23719 ins_pipe( pipe_slow );
23720 %}
23721
23722 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23723
23724 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23725 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23726 Matcher::vector_length_in_bytes(n->in(1))));
23727 match(Set dst (CountTrailingZerosV src));
23728 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23729 ins_cost(400);
23730 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23731 ins_encode %{
23732 int vlen_enc = vector_length_encoding(this, $src);
23733 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23734 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23735 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23736 %}
23737 ins_pipe( pipe_slow );
23738 %}
23739
23740 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23741 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23742 VM_Version::supports_avx512cd() &&
23743 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23744 match(Set dst (CountTrailingZerosV src));
23745 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23746 ins_cost(400);
23747 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23748 ins_encode %{
23749 int vlen_enc = vector_length_encoding(this, $src);
23750 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23751 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23752 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23753 %}
23754 ins_pipe( pipe_slow );
23755 %}
23756
23757 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23758 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23759 match(Set dst (CountTrailingZerosV src));
23760 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23761 ins_cost(400);
23762 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23763 ins_encode %{
23764 int vlen_enc = vector_length_encoding(this, $src);
23765 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23766 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23767 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23768 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23769 %}
23770 ins_pipe( pipe_slow );
23771 %}
23772
23773 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23774 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23775 match(Set dst (CountTrailingZerosV src));
23776 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23777 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23778 ins_encode %{
23779 int vlen_enc = vector_length_encoding(this, $src);
23780 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23781 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23782 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23783 %}
23784 ins_pipe( pipe_slow );
23785 %}
23786
23787
23788 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23789
23790 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23791 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23792 effect(TEMP dst);
23793 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23794 ins_encode %{
23795 int vector_len = vector_length_encoding(this);
23796 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23797 %}
23798 ins_pipe( pipe_slow );
23799 %}
23800
23801 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23802 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23803 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23804 effect(TEMP dst);
23805 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23806 ins_encode %{
23807 int vector_len = vector_length_encoding(this);
23808 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23809 %}
23810 ins_pipe( pipe_slow );
23811 %}
23812
23813 // --------------------------------- Rotation Operations ----------------------------------
23814 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23815 match(Set dst (RotateLeftV src shift));
23816 match(Set dst (RotateRightV src shift));
23817 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23818 ins_encode %{
23819 int opcode = this->ideal_Opcode();
23820 int vector_len = vector_length_encoding(this);
23821 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23822 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23823 %}
23824 ins_pipe( pipe_slow );
23825 %}
23826
23827 instruct vprorate(vec dst, vec src, vec shift) %{
23828 match(Set dst (RotateLeftV src shift));
23829 match(Set dst (RotateRightV src shift));
23830 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23831 ins_encode %{
23832 int opcode = this->ideal_Opcode();
23833 int vector_len = vector_length_encoding(this);
23834 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23835 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23836 %}
23837 ins_pipe( pipe_slow );
23838 %}
23839
23840 // ---------------------------------- Masked Operations ------------------------------------
23841 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23842 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23843 match(Set dst (LoadVectorMasked mem mask));
23844 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23845 ins_encode %{
23846 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23847 int vlen_enc = vector_length_encoding(this);
23848 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23849 %}
23850 ins_pipe( pipe_slow );
23851 %}
23852
23853
23854 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23855 predicate(n->in(3)->bottom_type()->isa_vectmask());
23856 match(Set dst (LoadVectorMasked mem mask));
23857 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23858 ins_encode %{
23859 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23860 int vector_len = vector_length_encoding(this);
23861 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23862 %}
23863 ins_pipe( pipe_slow );
23864 %}
23865
23866 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23867 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23868 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23869 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23870 ins_encode %{
23871 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23872 int vlen_enc = vector_length_encoding(src_node);
23873 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23874 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23875 %}
23876 ins_pipe( pipe_slow );
23877 %}
23878
23879 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23880 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23881 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23882 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23883 ins_encode %{
23884 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23885 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23886 int vlen_enc = vector_length_encoding(src_node);
23887 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23888 %}
23889 ins_pipe( pipe_slow );
23890 %}
23891
23892 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23893 match(Set addr (VerifyVectorAlignment addr mask));
23894 effect(KILL cr);
23895 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23896 ins_encode %{
23897 Label Lskip;
23898 // check if masked bits of addr are zero
23899 __ testq($addr$$Register, $mask$$constant);
23900 __ jccb(Assembler::equal, Lskip);
23901 __ stop("verify_vector_alignment found a misaligned vector memory access");
23902 __ bind(Lskip);
23903 %}
23904 ins_pipe(pipe_slow);
23905 %}
23906
23907 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23908 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23909 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23910 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23911 ins_encode %{
23912 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23913 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23914
23915 Label DONE;
23916 int vlen_enc = vector_length_encoding(this, $src1);
23917 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23918
23919 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23920 __ mov64($dst$$Register, -1L);
23921 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23922 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23923 __ jccb(Assembler::carrySet, DONE);
23924 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23925 __ notq($dst$$Register);
23926 __ tzcntq($dst$$Register, $dst$$Register);
23927 __ bind(DONE);
23928 %}
23929 ins_pipe( pipe_slow );
23930 %}
23931
23932
23933 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23934 match(Set dst (VectorMaskGen len));
23935 effect(TEMP temp, KILL cr);
23936 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23937 ins_encode %{
23938 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23939 %}
23940 ins_pipe( pipe_slow );
23941 %}
23942
23943 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23944 match(Set dst (VectorMaskGen len));
23945 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23946 effect(TEMP temp);
23947 ins_encode %{
23948 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23949 __ kmovql($dst$$KRegister, $temp$$Register);
23950 %}
23951 ins_pipe( pipe_slow );
23952 %}
23953
23954 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23955 predicate(n->in(1)->bottom_type()->isa_vectmask());
23956 match(Set dst (VectorMaskToLong mask));
23957 effect(TEMP dst, KILL cr);
23958 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23959 ins_encode %{
23960 int opcode = this->ideal_Opcode();
23961 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23962 int mask_len = Matcher::vector_length(this, $mask);
23963 int mask_size = mask_len * type2aelembytes(mbt);
23964 int vlen_enc = vector_length_encoding(this, $mask);
23965 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23966 $dst$$Register, mask_len, mask_size, vlen_enc);
23967 %}
23968 ins_pipe( pipe_slow );
23969 %}
23970
23971 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23972 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23973 match(Set dst (VectorMaskToLong mask));
23974 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23975 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23976 ins_encode %{
23977 int opcode = this->ideal_Opcode();
23978 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23979 int mask_len = Matcher::vector_length(this, $mask);
23980 int vlen_enc = vector_length_encoding(this, $mask);
23981 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23982 $dst$$Register, mask_len, mbt, vlen_enc);
23983 %}
23984 ins_pipe( pipe_slow );
23985 %}
23986
23987 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23988 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
23989 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23990 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23991 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23992 ins_encode %{
23993 int opcode = this->ideal_Opcode();
23994 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23995 int mask_len = Matcher::vector_length(this, $mask);
23996 int vlen_enc = vector_length_encoding(this, $mask);
23997 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23998 $dst$$Register, mask_len, mbt, vlen_enc);
23999 %}
24000 ins_pipe( pipe_slow );
24001 %}
24002
24003 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24004 predicate(n->in(1)->bottom_type()->isa_vectmask());
24005 match(Set dst (VectorMaskTrueCount mask));
24006 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24007 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24008 ins_encode %{
24009 int opcode = this->ideal_Opcode();
24010 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24011 int mask_len = Matcher::vector_length(this, $mask);
24012 int mask_size = mask_len * type2aelembytes(mbt);
24013 int vlen_enc = vector_length_encoding(this, $mask);
24014 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24015 $tmp$$Register, mask_len, mask_size, vlen_enc);
24016 %}
24017 ins_pipe( pipe_slow );
24018 %}
24019
24020 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24021 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24022 match(Set dst (VectorMaskTrueCount mask));
24023 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24024 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24025 ins_encode %{
24026 int opcode = this->ideal_Opcode();
24027 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24028 int mask_len = Matcher::vector_length(this, $mask);
24029 int vlen_enc = vector_length_encoding(this, $mask);
24030 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24031 $tmp$$Register, mask_len, mbt, vlen_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24037 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24038 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24039 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24040 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24041 ins_encode %{
24042 int opcode = this->ideal_Opcode();
24043 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24044 int mask_len = Matcher::vector_length(this, $mask);
24045 int vlen_enc = vector_length_encoding(this, $mask);
24046 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24047 $tmp$$Register, mask_len, mbt, vlen_enc);
24048 %}
24049 ins_pipe( pipe_slow );
24050 %}
24051
24052 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24053 predicate(n->in(1)->bottom_type()->isa_vectmask());
24054 match(Set dst (VectorMaskFirstTrue mask));
24055 match(Set dst (VectorMaskLastTrue mask));
24056 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24057 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24058 ins_encode %{
24059 int opcode = this->ideal_Opcode();
24060 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24061 int mask_len = Matcher::vector_length(this, $mask);
24062 int mask_size = mask_len * type2aelembytes(mbt);
24063 int vlen_enc = vector_length_encoding(this, $mask);
24064 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24065 $tmp$$Register, mask_len, mask_size, vlen_enc);
24066 %}
24067 ins_pipe( pipe_slow );
24068 %}
24069
24070 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24071 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24072 match(Set dst (VectorMaskFirstTrue mask));
24073 match(Set dst (VectorMaskLastTrue mask));
24074 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24075 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24076 ins_encode %{
24077 int opcode = this->ideal_Opcode();
24078 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24079 int mask_len = Matcher::vector_length(this, $mask);
24080 int vlen_enc = vector_length_encoding(this, $mask);
24081 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24082 $tmp$$Register, mask_len, mbt, vlen_enc);
24083 %}
24084 ins_pipe( pipe_slow );
24085 %}
24086
24087 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24088 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24089 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24090 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24091 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24092 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24093 ins_encode %{
24094 int opcode = this->ideal_Opcode();
24095 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24096 int mask_len = Matcher::vector_length(this, $mask);
24097 int vlen_enc = vector_length_encoding(this, $mask);
24098 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24099 $tmp$$Register, mask_len, mbt, vlen_enc);
24100 %}
24101 ins_pipe( pipe_slow );
24102 %}
24103
24104 // --------------------------------- Compress/Expand Operations ---------------------------
24105 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24106 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24107 match(Set dst (CompressV src mask));
24108 match(Set dst (ExpandV src mask));
24109 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24110 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24111 ins_encode %{
24112 int opcode = this->ideal_Opcode();
24113 int vlen_enc = vector_length_encoding(this);
24114 BasicType bt = Matcher::vector_element_basic_type(this);
24115 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24116 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24117 %}
24118 ins_pipe( pipe_slow );
24119 %}
24120
24121 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24122 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24123 match(Set dst (CompressV src mask));
24124 match(Set dst (ExpandV src mask));
24125 format %{ "vector_compress_expand $dst, $src, $mask" %}
24126 ins_encode %{
24127 int opcode = this->ideal_Opcode();
24128 int vector_len = vector_length_encoding(this);
24129 BasicType bt = Matcher::vector_element_basic_type(this);
24130 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24131 %}
24132 ins_pipe( pipe_slow );
24133 %}
24134
24135 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24136 match(Set dst (CompressM mask));
24137 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24138 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24139 ins_encode %{
24140 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24141 int mask_len = Matcher::vector_length(this);
24142 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24143 %}
24144 ins_pipe( pipe_slow );
24145 %}
24146
24147 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24148
24149 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24150 predicate(!VM_Version::supports_gfni());
24151 match(Set dst (ReverseV src));
24152 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24153 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24154 ins_encode %{
24155 int vec_enc = vector_length_encoding(this);
24156 BasicType bt = Matcher::vector_element_basic_type(this);
24157 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24158 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24159 %}
24160 ins_pipe( pipe_slow );
24161 %}
24162
24163 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24164 predicate(VM_Version::supports_gfni());
24165 match(Set dst (ReverseV src));
24166 effect(TEMP dst, TEMP xtmp);
24167 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24168 ins_encode %{
24169 int vec_enc = vector_length_encoding(this);
24170 BasicType bt = Matcher::vector_element_basic_type(this);
24171 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24172 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24173 $xtmp$$XMMRegister);
24174 %}
24175 ins_pipe( pipe_slow );
24176 %}
24177
24178 instruct vreverse_byte_reg(vec dst, vec src) %{
24179 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24180 match(Set dst (ReverseBytesV src));
24181 effect(TEMP dst);
24182 format %{ "vector_reverse_byte $dst, $src" %}
24183 ins_encode %{
24184 int vec_enc = vector_length_encoding(this);
24185 BasicType bt = Matcher::vector_element_basic_type(this);
24186 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24187 %}
24188 ins_pipe( pipe_slow );
24189 %}
24190
24191 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24192 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24193 match(Set dst (ReverseBytesV src));
24194 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24195 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24196 ins_encode %{
24197 int vec_enc = vector_length_encoding(this);
24198 BasicType bt = Matcher::vector_element_basic_type(this);
24199 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24200 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24201 %}
24202 ins_pipe( pipe_slow );
24203 %}
24204
24205 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24206
24207 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24208 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24209 Matcher::vector_length_in_bytes(n->in(1))));
24210 match(Set dst (CountLeadingZerosV src));
24211 format %{ "vector_count_leading_zeros $dst, $src" %}
24212 ins_encode %{
24213 int vlen_enc = vector_length_encoding(this, $src);
24214 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24215 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24216 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24217 %}
24218 ins_pipe( pipe_slow );
24219 %}
24220
24221 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24222 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24223 Matcher::vector_length_in_bytes(n->in(1))));
24224 match(Set dst (CountLeadingZerosV src mask));
24225 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24226 ins_encode %{
24227 int vlen_enc = vector_length_encoding(this, $src);
24228 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24229 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24230 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24231 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24232 %}
24233 ins_pipe( pipe_slow );
24234 %}
24235
24236 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24237 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24238 VM_Version::supports_avx512cd() &&
24239 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24240 match(Set dst (CountLeadingZerosV src));
24241 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24242 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24243 ins_encode %{
24244 int vlen_enc = vector_length_encoding(this, $src);
24245 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24246 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24247 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24248 %}
24249 ins_pipe( pipe_slow );
24250 %}
24251
24252 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24253 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24254 match(Set dst (CountLeadingZerosV src));
24255 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24256 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24257 ins_encode %{
24258 int vlen_enc = vector_length_encoding(this, $src);
24259 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24260 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24261 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24262 $rtmp$$Register, true, vlen_enc);
24263 %}
24264 ins_pipe( pipe_slow );
24265 %}
24266
24267 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24268 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24269 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24270 match(Set dst (CountLeadingZerosV src));
24271 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24272 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24273 ins_encode %{
24274 int vlen_enc = vector_length_encoding(this, $src);
24275 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24276 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24277 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24278 %}
24279 ins_pipe( pipe_slow );
24280 %}
24281
24282 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24283 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24284 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24285 match(Set dst (CountLeadingZerosV src));
24286 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24287 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24288 ins_encode %{
24289 int vlen_enc = vector_length_encoding(this, $src);
24290 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24291 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24292 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24293 %}
24294 ins_pipe( pipe_slow );
24295 %}
24296
24297 // ---------------------------------- Vector Masked Operations ------------------------------------
24298
24299 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24300 match(Set dst (AddVB (Binary dst src2) mask));
24301 match(Set dst (AddVS (Binary dst src2) mask));
24302 match(Set dst (AddVI (Binary dst src2) mask));
24303 match(Set dst (AddVL (Binary dst src2) mask));
24304 match(Set dst (AddVF (Binary dst src2) mask));
24305 match(Set dst (AddVD (Binary dst src2) mask));
24306 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24307 ins_encode %{
24308 int vlen_enc = vector_length_encoding(this);
24309 BasicType bt = Matcher::vector_element_basic_type(this);
24310 int opc = this->ideal_Opcode();
24311 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24312 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24313 %}
24314 ins_pipe( pipe_slow );
24315 %}
24316
24317 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24318 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24319 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24320 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24321 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24322 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24323 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24324 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24325 ins_encode %{
24326 int vlen_enc = vector_length_encoding(this);
24327 BasicType bt = Matcher::vector_element_basic_type(this);
24328 int opc = this->ideal_Opcode();
24329 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24330 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24331 %}
24332 ins_pipe( pipe_slow );
24333 %}
24334
24335 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24336 match(Set dst (XorV (Binary dst src2) mask));
24337 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24338 ins_encode %{
24339 int vlen_enc = vector_length_encoding(this);
24340 BasicType bt = Matcher::vector_element_basic_type(this);
24341 int opc = this->ideal_Opcode();
24342 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24343 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24344 %}
24345 ins_pipe( pipe_slow );
24346 %}
24347
24348 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24349 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24350 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24351 ins_encode %{
24352 int vlen_enc = vector_length_encoding(this);
24353 BasicType bt = Matcher::vector_element_basic_type(this);
24354 int opc = this->ideal_Opcode();
24355 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24356 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24357 %}
24358 ins_pipe( pipe_slow );
24359 %}
24360
24361 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24362 match(Set dst (OrV (Binary dst src2) mask));
24363 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24364 ins_encode %{
24365 int vlen_enc = vector_length_encoding(this);
24366 BasicType bt = Matcher::vector_element_basic_type(this);
24367 int opc = this->ideal_Opcode();
24368 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24369 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24370 %}
24371 ins_pipe( pipe_slow );
24372 %}
24373
24374 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24375 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24376 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24377 ins_encode %{
24378 int vlen_enc = vector_length_encoding(this);
24379 BasicType bt = Matcher::vector_element_basic_type(this);
24380 int opc = this->ideal_Opcode();
24381 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24382 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24383 %}
24384 ins_pipe( pipe_slow );
24385 %}
24386
24387 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24388 match(Set dst (AndV (Binary dst src2) mask));
24389 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24390 ins_encode %{
24391 int vlen_enc = vector_length_encoding(this);
24392 BasicType bt = Matcher::vector_element_basic_type(this);
24393 int opc = this->ideal_Opcode();
24394 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24395 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24396 %}
24397 ins_pipe( pipe_slow );
24398 %}
24399
24400 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24401 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24402 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24403 ins_encode %{
24404 int vlen_enc = vector_length_encoding(this);
24405 BasicType bt = Matcher::vector_element_basic_type(this);
24406 int opc = this->ideal_Opcode();
24407 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24408 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24409 %}
24410 ins_pipe( pipe_slow );
24411 %}
24412
24413 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24414 match(Set dst (SubVB (Binary dst src2) mask));
24415 match(Set dst (SubVS (Binary dst src2) mask));
24416 match(Set dst (SubVI (Binary dst src2) mask));
24417 match(Set dst (SubVL (Binary dst src2) mask));
24418 match(Set dst (SubVF (Binary dst src2) mask));
24419 match(Set dst (SubVD (Binary dst src2) mask));
24420 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24421 ins_encode %{
24422 int vlen_enc = vector_length_encoding(this);
24423 BasicType bt = Matcher::vector_element_basic_type(this);
24424 int opc = this->ideal_Opcode();
24425 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24426 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24427 %}
24428 ins_pipe( pipe_slow );
24429 %}
24430
24431 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24432 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24433 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24434 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24435 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24436 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24437 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24438 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24439 ins_encode %{
24440 int vlen_enc = vector_length_encoding(this);
24441 BasicType bt = Matcher::vector_element_basic_type(this);
24442 int opc = this->ideal_Opcode();
24443 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24444 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24445 %}
24446 ins_pipe( pipe_slow );
24447 %}
24448
24449 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24450 match(Set dst (MulVS (Binary dst src2) mask));
24451 match(Set dst (MulVI (Binary dst src2) mask));
24452 match(Set dst (MulVL (Binary dst src2) mask));
24453 match(Set dst (MulVF (Binary dst src2) mask));
24454 match(Set dst (MulVD (Binary dst src2) mask));
24455 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24456 ins_encode %{
24457 int vlen_enc = vector_length_encoding(this);
24458 BasicType bt = Matcher::vector_element_basic_type(this);
24459 int opc = this->ideal_Opcode();
24460 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24461 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24462 %}
24463 ins_pipe( pipe_slow );
24464 %}
24465
24466 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24467 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24468 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24469 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24470 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24471 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24472 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24473 ins_encode %{
24474 int vlen_enc = vector_length_encoding(this);
24475 BasicType bt = Matcher::vector_element_basic_type(this);
24476 int opc = this->ideal_Opcode();
24477 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24478 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24479 %}
24480 ins_pipe( pipe_slow );
24481 %}
24482
24483 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24484 match(Set dst (SqrtVF dst mask));
24485 match(Set dst (SqrtVD dst mask));
24486 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24487 ins_encode %{
24488 int vlen_enc = vector_length_encoding(this);
24489 BasicType bt = Matcher::vector_element_basic_type(this);
24490 int opc = this->ideal_Opcode();
24491 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24492 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24493 %}
24494 ins_pipe( pipe_slow );
24495 %}
24496
24497 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24498 match(Set dst (DivVF (Binary dst src2) mask));
24499 match(Set dst (DivVD (Binary dst src2) mask));
24500 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24501 ins_encode %{
24502 int vlen_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 int opc = this->ideal_Opcode();
24505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24507 %}
24508 ins_pipe( pipe_slow );
24509 %}
24510
24511 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24512 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24513 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24514 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24515 ins_encode %{
24516 int vlen_enc = vector_length_encoding(this);
24517 BasicType bt = Matcher::vector_element_basic_type(this);
24518 int opc = this->ideal_Opcode();
24519 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24521 %}
24522 ins_pipe( pipe_slow );
24523 %}
24524
24525
24526 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24527 match(Set dst (RotateLeftV (Binary dst shift) mask));
24528 match(Set dst (RotateRightV (Binary dst shift) mask));
24529 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24530 ins_encode %{
24531 int vlen_enc = vector_length_encoding(this);
24532 BasicType bt = Matcher::vector_element_basic_type(this);
24533 int opc = this->ideal_Opcode();
24534 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24535 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24536 %}
24537 ins_pipe( pipe_slow );
24538 %}
24539
24540 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24541 match(Set dst (RotateLeftV (Binary dst src2) mask));
24542 match(Set dst (RotateRightV (Binary dst src2) mask));
24543 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24544 ins_encode %{
24545 int vlen_enc = vector_length_encoding(this);
24546 BasicType bt = Matcher::vector_element_basic_type(this);
24547 int opc = this->ideal_Opcode();
24548 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24549 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24550 %}
24551 ins_pipe( pipe_slow );
24552 %}
24553
24554 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24555 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24556 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24557 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24558 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24559 ins_encode %{
24560 int vlen_enc = vector_length_encoding(this);
24561 BasicType bt = Matcher::vector_element_basic_type(this);
24562 int opc = this->ideal_Opcode();
24563 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24564 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24565 %}
24566 ins_pipe( pipe_slow );
24567 %}
24568
24569 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24570 predicate(!n->as_ShiftV()->is_var_shift());
24571 match(Set dst (LShiftVS (Binary dst src2) mask));
24572 match(Set dst (LShiftVI (Binary dst src2) mask));
24573 match(Set dst (LShiftVL (Binary dst src2) mask));
24574 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24575 ins_encode %{
24576 int vlen_enc = vector_length_encoding(this);
24577 BasicType bt = Matcher::vector_element_basic_type(this);
24578 int opc = this->ideal_Opcode();
24579 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24580 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24581 %}
24582 ins_pipe( pipe_slow );
24583 %}
24584
24585 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24586 predicate(n->as_ShiftV()->is_var_shift());
24587 match(Set dst (LShiftVS (Binary dst src2) mask));
24588 match(Set dst (LShiftVI (Binary dst src2) mask));
24589 match(Set dst (LShiftVL (Binary dst src2) mask));
24590 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24591 ins_encode %{
24592 int vlen_enc = vector_length_encoding(this);
24593 BasicType bt = Matcher::vector_element_basic_type(this);
24594 int opc = this->ideal_Opcode();
24595 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24596 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24597 %}
24598 ins_pipe( pipe_slow );
24599 %}
24600
24601 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24602 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24603 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24604 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24605 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24606 ins_encode %{
24607 int vlen_enc = vector_length_encoding(this);
24608 BasicType bt = Matcher::vector_element_basic_type(this);
24609 int opc = this->ideal_Opcode();
24610 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24611 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24612 %}
24613 ins_pipe( pipe_slow );
24614 %}
24615
24616 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24617 predicate(!n->as_ShiftV()->is_var_shift());
24618 match(Set dst (RShiftVS (Binary dst src2) mask));
24619 match(Set dst (RShiftVI (Binary dst src2) mask));
24620 match(Set dst (RShiftVL (Binary dst src2) mask));
24621 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24622 ins_encode %{
24623 int vlen_enc = vector_length_encoding(this);
24624 BasicType bt = Matcher::vector_element_basic_type(this);
24625 int opc = this->ideal_Opcode();
24626 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24627 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24628 %}
24629 ins_pipe( pipe_slow );
24630 %}
24631
24632 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24633 predicate(n->as_ShiftV()->is_var_shift());
24634 match(Set dst (RShiftVS (Binary dst src2) mask));
24635 match(Set dst (RShiftVI (Binary dst src2) mask));
24636 match(Set dst (RShiftVL (Binary dst src2) mask));
24637 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24638 ins_encode %{
24639 int vlen_enc = vector_length_encoding(this);
24640 BasicType bt = Matcher::vector_element_basic_type(this);
24641 int opc = this->ideal_Opcode();
24642 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24643 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24644 %}
24645 ins_pipe( pipe_slow );
24646 %}
24647
24648 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24649 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24650 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24651 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24652 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24653 ins_encode %{
24654 int vlen_enc = vector_length_encoding(this);
24655 BasicType bt = Matcher::vector_element_basic_type(this);
24656 int opc = this->ideal_Opcode();
24657 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24658 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24659 %}
24660 ins_pipe( pipe_slow );
24661 %}
24662
24663 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24664 predicate(!n->as_ShiftV()->is_var_shift());
24665 match(Set dst (URShiftVS (Binary dst src2) mask));
24666 match(Set dst (URShiftVI (Binary dst src2) mask));
24667 match(Set dst (URShiftVL (Binary dst src2) mask));
24668 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24669 ins_encode %{
24670 int vlen_enc = vector_length_encoding(this);
24671 BasicType bt = Matcher::vector_element_basic_type(this);
24672 int opc = this->ideal_Opcode();
24673 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24674 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24675 %}
24676 ins_pipe( pipe_slow );
24677 %}
24678
24679 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24680 predicate(n->as_ShiftV()->is_var_shift());
24681 match(Set dst (URShiftVS (Binary dst src2) mask));
24682 match(Set dst (URShiftVI (Binary dst src2) mask));
24683 match(Set dst (URShiftVL (Binary dst src2) mask));
24684 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24685 ins_encode %{
24686 int vlen_enc = vector_length_encoding(this);
24687 BasicType bt = Matcher::vector_element_basic_type(this);
24688 int opc = this->ideal_Opcode();
24689 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24690 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24691 %}
24692 ins_pipe( pipe_slow );
24693 %}
24694
24695 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24696 match(Set dst (MaxV (Binary dst src2) mask));
24697 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24698 ins_encode %{
24699 int vlen_enc = vector_length_encoding(this);
24700 BasicType bt = Matcher::vector_element_basic_type(this);
24701 int opc = this->ideal_Opcode();
24702 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24703 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24704 %}
24705 ins_pipe( pipe_slow );
24706 %}
24707
24708 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24709 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24710 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24711 ins_encode %{
24712 int vlen_enc = vector_length_encoding(this);
24713 BasicType bt = Matcher::vector_element_basic_type(this);
24714 int opc = this->ideal_Opcode();
24715 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24716 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24717 %}
24718 ins_pipe( pipe_slow );
24719 %}
24720
24721 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24722 match(Set dst (MinV (Binary dst src2) mask));
24723 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24724 ins_encode %{
24725 int vlen_enc = vector_length_encoding(this);
24726 BasicType bt = Matcher::vector_element_basic_type(this);
24727 int opc = this->ideal_Opcode();
24728 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24729 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24730 %}
24731 ins_pipe( pipe_slow );
24732 %}
24733
24734 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24735 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24736 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24737 ins_encode %{
24738 int vlen_enc = vector_length_encoding(this);
24739 BasicType bt = Matcher::vector_element_basic_type(this);
24740 int opc = this->ideal_Opcode();
24741 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24742 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24743 %}
24744 ins_pipe( pipe_slow );
24745 %}
24746
24747 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24748 match(Set dst (VectorRearrange (Binary dst src2) mask));
24749 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24750 ins_encode %{
24751 int vlen_enc = vector_length_encoding(this);
24752 BasicType bt = Matcher::vector_element_basic_type(this);
24753 int opc = this->ideal_Opcode();
24754 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24755 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24756 %}
24757 ins_pipe( pipe_slow );
24758 %}
24759
24760 instruct vabs_masked(vec dst, kReg mask) %{
24761 match(Set dst (AbsVB dst mask));
24762 match(Set dst (AbsVS dst mask));
24763 match(Set dst (AbsVI dst mask));
24764 match(Set dst (AbsVL dst mask));
24765 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24766 ins_encode %{
24767 int vlen_enc = vector_length_encoding(this);
24768 BasicType bt = Matcher::vector_element_basic_type(this);
24769 int opc = this->ideal_Opcode();
24770 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24771 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24772 %}
24773 ins_pipe( pipe_slow );
24774 %}
24775
24776 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24777 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24778 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24779 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24780 ins_encode %{
24781 assert(UseFMA, "Needs FMA instructions support.");
24782 int vlen_enc = vector_length_encoding(this);
24783 BasicType bt = Matcher::vector_element_basic_type(this);
24784 int opc = this->ideal_Opcode();
24785 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24786 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24787 %}
24788 ins_pipe( pipe_slow );
24789 %}
24790
24791 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24792 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24793 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24794 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24795 ins_encode %{
24796 assert(UseFMA, "Needs FMA instructions support.");
24797 int vlen_enc = vector_length_encoding(this);
24798 BasicType bt = Matcher::vector_element_basic_type(this);
24799 int opc = this->ideal_Opcode();
24800 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24801 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24802 %}
24803 ins_pipe( pipe_slow );
24804 %}
24805
24806 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24807 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24808 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24809 ins_encode %{
24810 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24811 int vlen_enc = vector_length_encoding(this, $src1);
24812 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24813
24814 // Comparison i
24815 switch (src1_elem_bt) {
24816 case T_BYTE: {
24817 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24818 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24819 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24820 break;
24821 }
24822 case T_SHORT: {
24823 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24824 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24825 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24826 break;
24827 }
24828 case T_INT: {
24829 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24830 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24831 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24832 break;
24833 }
24834 case T_LONG: {
24835 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24836 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24837 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24838 break;
24839 }
24840 case T_FLOAT: {
24841 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24842 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24843 break;
24844 }
24845 case T_DOUBLE: {
24846 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24847 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24848 break;
24849 }
24850 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24851 }
24852 %}
24853 ins_pipe( pipe_slow );
24854 %}
24855
24856 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24857 predicate(Matcher::vector_length(n) <= 32);
24858 match(Set dst (MaskAll src));
24859 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24860 ins_encode %{
24861 int mask_len = Matcher::vector_length(this);
24862 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24863 %}
24864 ins_pipe( pipe_slow );
24865 %}
24866
24867 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24868 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24869 match(Set dst (XorVMask src (MaskAll cnt)));
24870 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24871 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24872 ins_encode %{
24873 uint masklen = Matcher::vector_length(this);
24874 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24875 %}
24876 ins_pipe( pipe_slow );
24877 %}
24878
24879 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24880 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24881 (Matcher::vector_length(n) == 16) ||
24882 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24883 match(Set dst (XorVMask src (MaskAll cnt)));
24884 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24885 ins_encode %{
24886 uint masklen = Matcher::vector_length(this);
24887 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24888 %}
24889 ins_pipe( pipe_slow );
24890 %}
24891
24892 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24893 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24894 match(Set dst (VectorLongToMask src));
24895 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24896 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24897 ins_encode %{
24898 int mask_len = Matcher::vector_length(this);
24899 int vec_enc = vector_length_encoding(mask_len);
24900 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24901 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24902 %}
24903 ins_pipe( pipe_slow );
24904 %}
24905
24906
24907 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24908 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24909 match(Set dst (VectorLongToMask src));
24910 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24911 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24912 ins_encode %{
24913 int mask_len = Matcher::vector_length(this);
24914 assert(mask_len <= 32, "invalid mask length");
24915 int vec_enc = vector_length_encoding(mask_len);
24916 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24917 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24918 %}
24919 ins_pipe( pipe_slow );
24920 %}
24921
24922 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24923 predicate(n->bottom_type()->isa_vectmask());
24924 match(Set dst (VectorLongToMask src));
24925 format %{ "long_to_mask_evex $dst, $src\t!" %}
24926 ins_encode %{
24927 __ kmov($dst$$KRegister, $src$$Register);
24928 %}
24929 ins_pipe( pipe_slow );
24930 %}
24931
24932 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24933 match(Set dst (AndVMask src1 src2));
24934 match(Set dst (OrVMask src1 src2));
24935 match(Set dst (XorVMask src1 src2));
24936 effect(TEMP kscratch);
24937 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24938 ins_encode %{
24939 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24940 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24941 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24942 uint masklen = Matcher::vector_length(this);
24943 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24944 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24945 %}
24946 ins_pipe( pipe_slow );
24947 %}
24948
24949 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24950 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24951 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24952 ins_encode %{
24953 int vlen_enc = vector_length_encoding(this);
24954 BasicType bt = Matcher::vector_element_basic_type(this);
24955 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24956 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24957 %}
24958 ins_pipe( pipe_slow );
24959 %}
24960
24961 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24962 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24963 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24964 ins_encode %{
24965 int vlen_enc = vector_length_encoding(this);
24966 BasicType bt = Matcher::vector_element_basic_type(this);
24967 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24968 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24969 %}
24970 ins_pipe( pipe_slow );
24971 %}
24972
24973 instruct castMM(kReg dst)
24974 %{
24975 match(Set dst (CastVV dst));
24976
24977 size(0);
24978 format %{ "# castVV of $dst" %}
24979 ins_encode(/* empty encoding */);
24980 ins_cost(0);
24981 ins_pipe(empty);
24982 %}
24983
24984 instruct castVV(vec dst)
24985 %{
24986 match(Set dst (CastVV dst));
24987
24988 size(0);
24989 format %{ "# castVV of $dst" %}
24990 ins_encode(/* empty encoding */);
24991 ins_cost(0);
24992 ins_pipe(empty);
24993 %}
24994
24995 instruct castVVLeg(legVec dst)
24996 %{
24997 match(Set dst (CastVV dst));
24998
24999 size(0);
25000 format %{ "# castVV of $dst" %}
25001 ins_encode(/* empty encoding */);
25002 ins_cost(0);
25003 ins_pipe(empty);
25004 %}
25005
25006 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25007 %{
25008 match(Set dst (IsInfiniteF src));
25009 effect(TEMP ktmp, KILL cr);
25010 format %{ "float_class_check $dst, $src" %}
25011 ins_encode %{
25012 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25013 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25014 %}
25015 ins_pipe(pipe_slow);
25016 %}
25017
25018 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25019 %{
25020 match(Set dst (IsInfiniteD src));
25021 effect(TEMP ktmp, KILL cr);
25022 format %{ "double_class_check $dst, $src" %}
25023 ins_encode %{
25024 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25025 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25026 %}
25027 ins_pipe(pipe_slow);
25028 %}
25029
25030 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25031 %{
25032 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25033 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25034 match(Set dst (SaturatingAddV src1 src2));
25035 match(Set dst (SaturatingSubV src1 src2));
25036 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25037 ins_encode %{
25038 int vlen_enc = vector_length_encoding(this);
25039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25040 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25041 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25042 %}
25043 ins_pipe(pipe_slow);
25044 %}
25045
25046 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25047 %{
25048 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25049 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25050 match(Set dst (SaturatingAddV src1 src2));
25051 match(Set dst (SaturatingSubV src1 src2));
25052 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25053 ins_encode %{
25054 int vlen_enc = vector_length_encoding(this);
25055 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25056 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25057 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25058 %}
25059 ins_pipe(pipe_slow);
25060 %}
25061
25062 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25063 %{
25064 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25065 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25066 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25067 match(Set dst (SaturatingAddV src1 src2));
25068 match(Set dst (SaturatingSubV src1 src2));
25069 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25070 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25071 ins_encode %{
25072 int vlen_enc = vector_length_encoding(this);
25073 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25074 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25075 $src1$$XMMRegister, $src2$$XMMRegister,
25076 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25077 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25078 %}
25079 ins_pipe(pipe_slow);
25080 %}
25081
25082 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25083 %{
25084 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25085 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25086 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25087 match(Set dst (SaturatingAddV src1 src2));
25088 match(Set dst (SaturatingSubV src1 src2));
25089 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25090 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25091 ins_encode %{
25092 int vlen_enc = vector_length_encoding(this);
25093 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25094 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25095 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25096 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25097 %}
25098 ins_pipe(pipe_slow);
25099 %}
25100
25101 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25102 %{
25103 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25104 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25105 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25106 match(Set dst (SaturatingAddV src1 src2));
25107 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25108 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25109 ins_encode %{
25110 int vlen_enc = vector_length_encoding(this);
25111 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25112 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25113 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25114 %}
25115 ins_pipe(pipe_slow);
25116 %}
25117
25118 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25119 %{
25120 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25121 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25122 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25123 match(Set dst (SaturatingAddV src1 src2));
25124 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25125 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25126 ins_encode %{
25127 int vlen_enc = vector_length_encoding(this);
25128 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25129 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25130 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25131 %}
25132 ins_pipe(pipe_slow);
25133 %}
25134
25135 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25136 %{
25137 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25138 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25139 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25140 match(Set dst (SaturatingSubV src1 src2));
25141 effect(TEMP ktmp);
25142 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25143 ins_encode %{
25144 int vlen_enc = vector_length_encoding(this);
25145 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25146 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25147 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25148 %}
25149 ins_pipe(pipe_slow);
25150 %}
25151
25152 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25153 %{
25154 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25155 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25156 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25157 match(Set dst (SaturatingSubV src1 src2));
25158 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25159 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25160 ins_encode %{
25161 int vlen_enc = vector_length_encoding(this);
25162 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25163 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25164 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25165 %}
25166 ins_pipe(pipe_slow);
25167 %}
25168
25169 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25170 %{
25171 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25172 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25173 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25174 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25175 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25176 ins_encode %{
25177 int vlen_enc = vector_length_encoding(this);
25178 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25179 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25180 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25181 %}
25182 ins_pipe(pipe_slow);
25183 %}
25184
25185 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25186 %{
25187 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25188 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25189 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25190 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25191 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25192 ins_encode %{
25193 int vlen_enc = vector_length_encoding(this);
25194 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25195 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25196 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25197 %}
25198 ins_pipe(pipe_slow);
25199 %}
25200
25201 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25202 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25203 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25204 match(Set dst (SaturatingAddV (Binary dst src) mask));
25205 match(Set dst (SaturatingSubV (Binary dst src) mask));
25206 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25207 ins_encode %{
25208 int vlen_enc = vector_length_encoding(this);
25209 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25210 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25211 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25212 %}
25213 ins_pipe( pipe_slow );
25214 %}
25215
25216 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25217 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25218 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25219 match(Set dst (SaturatingAddV (Binary dst src) mask));
25220 match(Set dst (SaturatingSubV (Binary dst src) mask));
25221 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25222 ins_encode %{
25223 int vlen_enc = vector_length_encoding(this);
25224 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25225 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25226 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25227 %}
25228 ins_pipe( pipe_slow );
25229 %}
25230
25231 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25232 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25233 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25234 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25235 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25236 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25237 ins_encode %{
25238 int vlen_enc = vector_length_encoding(this);
25239 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25240 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25241 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25242 %}
25243 ins_pipe( pipe_slow );
25244 %}
25245
25246 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25247 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25248 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25249 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25250 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25251 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25252 ins_encode %{
25253 int vlen_enc = vector_length_encoding(this);
25254 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25255 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25256 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25257 %}
25258 ins_pipe( pipe_slow );
25259 %}
25260
25261 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25262 %{
25263 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25264 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25265 ins_encode %{
25266 int vlen_enc = vector_length_encoding(this);
25267 BasicType bt = Matcher::vector_element_basic_type(this);
25268 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25269 %}
25270 ins_pipe(pipe_slow);
25271 %}
25272
25273 instruct reinterpretS2HF(regF dst, rRegI src)
25274 %{
25275 match(Set dst (ReinterpretS2HF src));
25276 format %{ "vmovw $dst, $src" %}
25277 ins_encode %{
25278 __ vmovw($dst$$XMMRegister, $src$$Register);
25279 %}
25280 ins_pipe(pipe_slow);
25281 %}
25282
25283 instruct reinterpretHF2S(rRegI dst, regF src)
25284 %{
25285 match(Set dst (ReinterpretHF2S src));
25286 format %{ "vmovw $dst, $src" %}
25287 ins_encode %{
25288 __ vmovw($dst$$Register, $src$$XMMRegister);
25289 %}
25290 ins_pipe(pipe_slow);
25291 %}
25292
25293 instruct convF2HFAndS2HF(regF dst, regF src)
25294 %{
25295 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25296 format %{ "convF2HFAndS2HF $dst, $src" %}
25297 ins_encode %{
25298 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25299 %}
25300 ins_pipe(pipe_slow);
25301 %}
25302
25303 instruct convHF2SAndHF2F(regF dst, regF src)
25304 %{
25305 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25306 format %{ "convHF2SAndHF2F $dst, $src" %}
25307 ins_encode %{
25308 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25309 %}
25310 ins_pipe(pipe_slow);
25311 %}
25312
25313 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25314 %{
25315 match(Set dst (SqrtHF src));
25316 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25317 ins_encode %{
25318 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25319 %}
25320 ins_pipe(pipe_slow);
25321 %}
25322
25323 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25324 %{
25325 match(Set dst (AddHF src1 src2));
25326 match(Set dst (DivHF src1 src2));
25327 match(Set dst (MulHF src1 src2));
25328 match(Set dst (SubHF src1 src2));
25329 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25330 ins_encode %{
25331 int opcode = this->ideal_Opcode();
25332 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25333 %}
25334 ins_pipe(pipe_slow);
25335 %}
25336
25337 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25338 %{
25339 predicate(VM_Version::supports_avx10_2());
25340 match(Set dst (MaxHF src1 src2));
25341 match(Set dst (MinHF src1 src2));
25342 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25343 ins_encode %{
25344 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25345 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25346 %}
25347 ins_pipe( pipe_slow );
25348 %}
25349
25350 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25351 %{
25352 predicate(!VM_Version::supports_avx10_2());
25353 match(Set dst (MaxHF src1 src2));
25354 match(Set dst (MinHF src1 src2));
25355 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25356 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25357 ins_encode %{
25358 int opcode = this->ideal_Opcode();
25359 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25360 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25361 %}
25362 ins_pipe( pipe_slow );
25363 %}
25364
25365 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25366 %{
25367 match(Set dst (FmaHF src2 (Binary dst src1)));
25368 effect(DEF dst);
25369 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25370 ins_encode %{
25371 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25372 %}
25373 ins_pipe( pipe_slow );
25374 %}
25375
25376
25377 instruct vector_sqrt_HF_reg(vec dst, vec src)
25378 %{
25379 match(Set dst (SqrtVHF src));
25380 format %{ "vector_sqrt_fp16 $dst, $src" %}
25381 ins_encode %{
25382 int vlen_enc = vector_length_encoding(this);
25383 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25384 %}
25385 ins_pipe(pipe_slow);
25386 %}
25387
25388 instruct vector_sqrt_HF_mem(vec dst, memory src)
25389 %{
25390 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25391 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25392 ins_encode %{
25393 int vlen_enc = vector_length_encoding(this);
25394 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25395 %}
25396 ins_pipe(pipe_slow);
25397 %}
25398
25399 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25400 %{
25401 match(Set dst (AddVHF src1 src2));
25402 match(Set dst (DivVHF src1 src2));
25403 match(Set dst (MulVHF src1 src2));
25404 match(Set dst (SubVHF src1 src2));
25405 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25406 ins_encode %{
25407 int vlen_enc = vector_length_encoding(this);
25408 int opcode = this->ideal_Opcode();
25409 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25410 %}
25411 ins_pipe(pipe_slow);
25412 %}
25413
25414
25415 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25416 %{
25417 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25418 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25419 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25420 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25421 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25422 ins_encode %{
25423 int vlen_enc = vector_length_encoding(this);
25424 int opcode = this->ideal_Opcode();
25425 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25426 %}
25427 ins_pipe(pipe_slow);
25428 %}
25429
25430 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25431 %{
25432 match(Set dst (FmaVHF src2 (Binary dst src1)));
25433 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25434 ins_encode %{
25435 int vlen_enc = vector_length_encoding(this);
25436 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25437 %}
25438 ins_pipe( pipe_slow );
25439 %}
25440
25441 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25442 %{
25443 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25444 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25445 ins_encode %{
25446 int vlen_enc = vector_length_encoding(this);
25447 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25448 %}
25449 ins_pipe( pipe_slow );
25450 %}
25451
25452 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25453 %{
25454 predicate(VM_Version::supports_avx10_2());
25455 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25456 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25457 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25458 ins_encode %{
25459 int vlen_enc = vector_length_encoding(this);
25460 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25461 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25462 %}
25463 ins_pipe( pipe_slow );
25464 %}
25465
25466 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25467 %{
25468 predicate(VM_Version::supports_avx10_2());
25469 match(Set dst (MinVHF src1 src2));
25470 match(Set dst (MaxVHF src1 src2));
25471 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25472 ins_encode %{
25473 int vlen_enc = vector_length_encoding(this);
25474 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25475 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25476 %}
25477 ins_pipe( pipe_slow );
25478 %}
25479
25480 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25481 %{
25482 predicate(!VM_Version::supports_avx10_2());
25483 match(Set dst (MinVHF src1 src2));
25484 match(Set dst (MaxVHF src1 src2));
25485 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25486 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25487 ins_encode %{
25488 int vlen_enc = vector_length_encoding(this);
25489 int opcode = this->ideal_Opcode();
25490 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25491 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25492 %}
25493 ins_pipe( pipe_slow );
25494 %}
25495
25496 //----------PEEPHOLE RULES-----------------------------------------------------
25497 // These must follow all instruction definitions as they use the names
25498 // defined in the instructions definitions.
25499 //
25500 // peeppredicate ( rule_predicate );
25501 // // the predicate unless which the peephole rule will be ignored
25502 //
25503 // peepmatch ( root_instr_name [preceding_instruction]* );
25504 //
25505 // peepprocedure ( procedure_name );
25506 // // provide a procedure name to perform the optimization, the procedure should
25507 // // reside in the architecture dependent peephole file, the method has the
25508 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25509 // // with the arguments being the basic block, the current node index inside the
25510 // // block, the register allocator, the functions upon invoked return a new node
25511 // // defined in peepreplace, and the rules of the nodes appearing in the
25512 // // corresponding peepmatch, the function return true if successful, else
25513 // // return false
25514 //
25515 // peepconstraint %{
25516 // (instruction_number.operand_name relational_op instruction_number.operand_name
25517 // [, ...] );
25518 // // instruction numbers are zero-based using left to right order in peepmatch
25519 //
25520 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25521 // // provide an instruction_number.operand_name for each operand that appears
25522 // // in the replacement instruction's match rule
25523 //
25524 // ---------VM FLAGS---------------------------------------------------------
25525 //
25526 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25527 //
25528 // Each peephole rule is given an identifying number starting with zero and
25529 // increasing by one in the order seen by the parser. An individual peephole
25530 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25531 // on the command-line.
25532 //
25533 // ---------CURRENT LIMITATIONS----------------------------------------------
25534 //
25535 // Only transformations inside a basic block (do we need more for peephole)
25536 //
25537 // ---------EXAMPLE----------------------------------------------------------
25538 //
25539 // // pertinent parts of existing instructions in architecture description
25540 // instruct movI(rRegI dst, rRegI src)
25541 // %{
25542 // match(Set dst (CopyI src));
25543 // %}
25544 //
25545 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25546 // %{
25547 // match(Set dst (AddI dst src));
25548 // effect(KILL cr);
25549 // %}
25550 //
25551 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25552 // %{
25553 // match(Set dst (AddI dst src));
25554 // %}
25555 //
25556 // 1. Simple replacement
25557 // - Only match adjacent instructions in same basic block
25558 // - Only equality constraints
25559 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25560 // - Only one replacement instruction
25561 //
25562 // // Change (inc mov) to lea
25563 // peephole %{
25564 // // lea should only be emitted when beneficial
25565 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25566 // // increment preceded by register-register move
25567 // peepmatch ( incI_rReg movI );
25568 // // require that the destination register of the increment
25569 // // match the destination register of the move
25570 // peepconstraint ( 0.dst == 1.dst );
25571 // // construct a replacement instruction that sets
25572 // // the destination to ( move's source register + one )
25573 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25574 // %}
25575 //
25576 // 2. Procedural replacement
25577 // - More flexible finding relevent nodes
25578 // - More flexible constraints
25579 // - More flexible transformations
25580 // - May utilise architecture-dependent API more effectively
25581 // - Currently only one replacement instruction due to adlc parsing capabilities
25582 //
25583 // // Change (inc mov) to lea
25584 // peephole %{
25585 // // lea should only be emitted when beneficial
25586 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25587 // // the rule numbers of these nodes inside are passed into the function below
25588 // peepmatch ( incI_rReg movI );
25589 // // the method that takes the responsibility of transformation
25590 // peepprocedure ( inc_mov_to_lea );
25591 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25592 // // node is passed into the function above
25593 // peepreplace ( leaI_rReg_immI() );
25594 // %}
25595
25596 // These instructions is not matched by the matcher but used by the peephole
25597 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25598 %{
25599 predicate(false);
25600 match(Set dst (AddI src1 src2));
25601 format %{ "leal $dst, [$src1 + $src2]" %}
25602 ins_encode %{
25603 Register dst = $dst$$Register;
25604 Register src1 = $src1$$Register;
25605 Register src2 = $src2$$Register;
25606 if (src1 != rbp && src1 != r13) {
25607 __ leal(dst, Address(src1, src2, Address::times_1));
25608 } else {
25609 assert(src2 != rbp && src2 != r13, "");
25610 __ leal(dst, Address(src2, src1, Address::times_1));
25611 }
25612 %}
25613 ins_pipe(ialu_reg_reg);
25614 %}
25615
25616 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25617 %{
25618 predicate(false);
25619 match(Set dst (AddI src1 src2));
25620 format %{ "leal $dst, [$src1 + $src2]" %}
25621 ins_encode %{
25622 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25623 %}
25624 ins_pipe(ialu_reg_reg);
25625 %}
25626
25627 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25628 %{
25629 predicate(false);
25630 match(Set dst (LShiftI src shift));
25631 format %{ "leal $dst, [$src << $shift]" %}
25632 ins_encode %{
25633 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25634 Register src = $src$$Register;
25635 if (scale == Address::times_2 && src != rbp && src != r13) {
25636 __ leal($dst$$Register, Address(src, src, Address::times_1));
25637 } else {
25638 __ leal($dst$$Register, Address(noreg, src, scale));
25639 }
25640 %}
25641 ins_pipe(ialu_reg_reg);
25642 %}
25643
25644 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25645 %{
25646 predicate(false);
25647 match(Set dst (AddL src1 src2));
25648 format %{ "leaq $dst, [$src1 + $src2]" %}
25649 ins_encode %{
25650 Register dst = $dst$$Register;
25651 Register src1 = $src1$$Register;
25652 Register src2 = $src2$$Register;
25653 if (src1 != rbp && src1 != r13) {
25654 __ leaq(dst, Address(src1, src2, Address::times_1));
25655 } else {
25656 assert(src2 != rbp && src2 != r13, "");
25657 __ leaq(dst, Address(src2, src1, Address::times_1));
25658 }
25659 %}
25660 ins_pipe(ialu_reg_reg);
25661 %}
25662
25663 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25664 %{
25665 predicate(false);
25666 match(Set dst (AddL src1 src2));
25667 format %{ "leaq $dst, [$src1 + $src2]" %}
25668 ins_encode %{
25669 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25670 %}
25671 ins_pipe(ialu_reg_reg);
25672 %}
25673
25674 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25675 %{
25676 predicate(false);
25677 match(Set dst (LShiftL src shift));
25678 format %{ "leaq $dst, [$src << $shift]" %}
25679 ins_encode %{
25680 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25681 Register src = $src$$Register;
25682 if (scale == Address::times_2 && src != rbp && src != r13) {
25683 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25684 } else {
25685 __ leaq($dst$$Register, Address(noreg, src, scale));
25686 }
25687 %}
25688 ins_pipe(ialu_reg_reg);
25689 %}
25690
25691 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25692 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25693 // processors with at least partial ALU support for lea
25694 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25695 // beneficial for processors with full ALU support
25696 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25697
25698 peephole
25699 %{
25700 peeppredicate(VM_Version::supports_fast_2op_lea());
25701 peepmatch (addI_rReg);
25702 peepprocedure (lea_coalesce_reg);
25703 peepreplace (leaI_rReg_rReg_peep());
25704 %}
25705
25706 peephole
25707 %{
25708 peeppredicate(VM_Version::supports_fast_2op_lea());
25709 peepmatch (addI_rReg_imm);
25710 peepprocedure (lea_coalesce_imm);
25711 peepreplace (leaI_rReg_immI_peep());
25712 %}
25713
25714 peephole
25715 %{
25716 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25717 VM_Version::is_intel_cascade_lake());
25718 peepmatch (incI_rReg);
25719 peepprocedure (lea_coalesce_imm);
25720 peepreplace (leaI_rReg_immI_peep());
25721 %}
25722
25723 peephole
25724 %{
25725 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25726 VM_Version::is_intel_cascade_lake());
25727 peepmatch (decI_rReg);
25728 peepprocedure (lea_coalesce_imm);
25729 peepreplace (leaI_rReg_immI_peep());
25730 %}
25731
25732 peephole
25733 %{
25734 peeppredicate(VM_Version::supports_fast_2op_lea());
25735 peepmatch (salI_rReg_immI2);
25736 peepprocedure (lea_coalesce_imm);
25737 peepreplace (leaI_rReg_immI2_peep());
25738 %}
25739
25740 peephole
25741 %{
25742 peeppredicate(VM_Version::supports_fast_2op_lea());
25743 peepmatch (addL_rReg);
25744 peepprocedure (lea_coalesce_reg);
25745 peepreplace (leaL_rReg_rReg_peep());
25746 %}
25747
25748 peephole
25749 %{
25750 peeppredicate(VM_Version::supports_fast_2op_lea());
25751 peepmatch (addL_rReg_imm);
25752 peepprocedure (lea_coalesce_imm);
25753 peepreplace (leaL_rReg_immL32_peep());
25754 %}
25755
25756 peephole
25757 %{
25758 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759 VM_Version::is_intel_cascade_lake());
25760 peepmatch (incL_rReg);
25761 peepprocedure (lea_coalesce_imm);
25762 peepreplace (leaL_rReg_immL32_peep());
25763 %}
25764
25765 peephole
25766 %{
25767 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25768 VM_Version::is_intel_cascade_lake());
25769 peepmatch (decL_rReg);
25770 peepprocedure (lea_coalesce_imm);
25771 peepreplace (leaL_rReg_immL32_peep());
25772 %}
25773
25774 peephole
25775 %{
25776 peeppredicate(VM_Version::supports_fast_2op_lea());
25777 peepmatch (salL_rReg_immI2);
25778 peepprocedure (lea_coalesce_imm);
25779 peepreplace (leaL_rReg_immI2_peep());
25780 %}
25781
25782 peephole
25783 %{
25784 peepmatch (leaPCompressedOopOffset);
25785 peepprocedure (lea_remove_redundant);
25786 %}
25787
25788 peephole
25789 %{
25790 peepmatch (leaP8Narrow);
25791 peepprocedure (lea_remove_redundant);
25792 %}
25793
25794 peephole
25795 %{
25796 peepmatch (leaP32Narrow);
25797 peepprocedure (lea_remove_redundant);
25798 %}
25799
25800 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25801 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25802
25803 //int variant
25804 peephole
25805 %{
25806 peepmatch (testI_reg);
25807 peepprocedure (test_may_remove);
25808 %}
25809
25810 //long variant
25811 peephole
25812 %{
25813 peepmatch (testL_reg);
25814 peepprocedure (test_may_remove);
25815 %}
25816
25817
25818 //----------SMARTSPILL RULES---------------------------------------------------
25819 // These must follow all instruction definitions as they use the names
25820 // defined in the instructions definitions.