1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 // Math.min() # Math.max()
1712 // --------------------------
1713 // ucomis[s/d] #
1714 // ja -> b # a
1715 // jp -> NaN # NaN
1716 // jb -> a # b
1717 // je #
1718 // |-jz -> a | b # a & b
1719 // | -> a #
1720 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1721 XMMRegister a, XMMRegister b,
1722 XMMRegister xmmt, Register rt,
1723 bool min, bool single) {
1724
1725 Label nan, zero, below, above, done;
1726
1727 if (single)
1728 __ ucomiss(a, b);
1729 else
1730 __ ucomisd(a, b);
1731
1732 if (dst->encoding() != (min ? b : a)->encoding())
1733 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1734 else
1735 __ jccb(Assembler::above, done);
1736
1737 __ jccb(Assembler::parity, nan); // PF=1
1738 __ jccb(Assembler::below, below); // CF=1
1739
1740 // equal
1741 __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
1742 if (single) {
1743 __ ucomiss(a, xmmt);
1744 __ jccb(Assembler::equal, zero);
1745
1746 __ movflt(dst, a);
1747 __ jmp(done);
1748 }
1749 else {
1750 __ ucomisd(a, xmmt);
1751 __ jccb(Assembler::equal, zero);
1752
1753 __ movdbl(dst, a);
1754 __ jmp(done);
1755 }
1756
1757 __ bind(zero);
1758 if (min)
1759 __ vpor(dst, a, b, Assembler::AVX_128bit);
1760 else
1761 __ vpand(dst, a, b, Assembler::AVX_128bit);
1762
1763 __ jmp(done);
1764
1765 __ bind(above);
1766 if (single)
1767 __ movflt(dst, min ? b : a);
1768 else
1769 __ movdbl(dst, min ? b : a);
1770
1771 __ jmp(done);
1772
1773 __ bind(nan);
1774 if (single) {
1775 __ movl(rt, 0x7fc00000); // Float.NaN
1776 __ movdl(dst, rt);
1777 }
1778 else {
1779 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1780 __ movdq(dst, rt);
1781 }
1782 __ jmp(done);
1783
1784 __ bind(below);
1785 if (single)
1786 __ movflt(dst, min ? a : b);
1787 else
1788 __ movdbl(dst, min ? a : b);
1789
1790 __ bind(done);
1791 }
1792
1793 //=============================================================================
1794 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1795
1796 int ConstantTable::calculate_table_base_offset() const {
1797 return 0; // absolute addressing, no offset
1798 }
1799
1800 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1801 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1802 ShouldNotReachHere();
1803 }
1804
1805 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1806 // Empty encoding
1807 }
1808
1809 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1810 return 0;
1811 }
1812
1813 #ifndef PRODUCT
1814 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1815 st->print("# MachConstantBaseNode (empty encoding)");
1816 }
1817 #endif
1818
1819
1820 //=============================================================================
1821 #ifndef PRODUCT
1822 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1823 Compile* C = ra_->C;
1824
1825 int framesize = C->output()->frame_size_in_bytes();
1826 int bangsize = C->output()->bang_size_in_bytes();
1827 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1828 // Remove wordSize for return addr which is already pushed.
1829 framesize -= wordSize;
1830
1831 if (C->output()->need_stack_bang(bangsize)) {
1832 framesize -= wordSize;
1833 st->print("# stack bang (%d bytes)", bangsize);
1834 st->print("\n\t");
1835 st->print("pushq rbp\t# Save rbp");
1836 if (PreserveFramePointer) {
1837 st->print("\n\t");
1838 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1839 }
1840 if (framesize) {
1841 st->print("\n\t");
1842 st->print("subq rsp, #%d\t# Create frame",framesize);
1843 }
1844 } else {
1845 st->print("subq rsp, #%d\t# Create frame",framesize);
1846 st->print("\n\t");
1847 framesize -= wordSize;
1848 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1849 if (PreserveFramePointer) {
1850 st->print("\n\t");
1851 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1852 if (framesize > 0) {
1853 st->print("\n\t");
1854 st->print("addq rbp, #%d", framesize);
1855 }
1856 }
1857 }
1858
1859 if (VerifyStackAtCalls) {
1860 st->print("\n\t");
1861 framesize -= wordSize;
1862 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1863 #ifdef ASSERT
1864 st->print("\n\t");
1865 st->print("# stack alignment check");
1866 #endif
1867 }
1868 if (C->stub_function() != nullptr) {
1869 st->print("\n\t");
1870 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1871 st->print("\n\t");
1872 st->print("je fast_entry\t");
1873 st->print("\n\t");
1874 st->print("call #nmethod_entry_barrier_stub\t");
1875 st->print("\n\tfast_entry:");
1876 }
1877 st->cr();
1878 }
1879 #endif
1880
1881 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1882 Compile* C = ra_->C;
1883
1884 int framesize = C->output()->frame_size_in_bytes();
1885 int bangsize = C->output()->bang_size_in_bytes();
1886
1887 if (C->clinit_barrier_on_entry()) {
1888 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1889 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1890
1891 Label L_skip_barrier;
1892 Register klass = rscratch1;
1893
1894 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1895 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1896
1897 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1898
1899 __ bind(L_skip_barrier);
1900 }
1901
1902 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1903
1904 C->output()->set_frame_complete(__ offset());
1905
1906 if (C->has_mach_constant_base_node()) {
1907 // NOTE: We set the table base offset here because users might be
1908 // emitted before MachConstantBaseNode.
1909 ConstantTable& constant_table = C->output()->constant_table();
1910 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1911 }
1912 }
1913
1914 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1915 {
1916 return MachNode::size(ra_); // too many variables; just compute it
1917 // the hard way
1918 }
1919
1920 int MachPrologNode::reloc() const
1921 {
1922 return 0; // a large enough number
1923 }
1924
1925 //=============================================================================
1926 #ifndef PRODUCT
1927 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1928 {
1929 Compile* C = ra_->C;
1930 if (generate_vzeroupper(C)) {
1931 st->print("vzeroupper");
1932 st->cr(); st->print("\t");
1933 }
1934
1935 int framesize = C->output()->frame_size_in_bytes();
1936 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1937 // Remove word for return adr already pushed
1938 // and RBP
1939 framesize -= 2*wordSize;
1940
1941 if (framesize) {
1942 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1943 st->print("\t");
1944 }
1945
1946 st->print_cr("popq rbp");
1947 if (do_polling() && C->is_method_compilation()) {
1948 st->print("\t");
1949 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1950 "ja #safepoint_stub\t"
1951 "# Safepoint: poll for GC");
1952 }
1953 }
1954 #endif
1955
1956 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1957 {
1958 Compile* C = ra_->C;
1959
1960 if (generate_vzeroupper(C)) {
1961 // Clear upper bits of YMM registers when current compiled code uses
1962 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1963 __ vzeroupper();
1964 }
1965
1966 int framesize = C->output()->frame_size_in_bytes();
1967 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1968 // Remove word for return adr already pushed
1969 // and RBP
1970 framesize -= 2*wordSize;
1971
1972 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1973
1974 if (framesize) {
1975 __ addq(rsp, framesize);
1976 }
1977
1978 __ popq(rbp);
1979
1980 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1981 __ reserved_stack_check();
1982 }
1983
1984 if (do_polling() && C->is_method_compilation()) {
1985 Label dummy_label;
1986 Label* code_stub = &dummy_label;
1987 if (!C->output()->in_scratch_emit_size()) {
1988 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1989 C->output()->add_stub(stub);
1990 code_stub = &stub->entry();
1991 }
1992 __ relocate(relocInfo::poll_return_type);
1993 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
1994 }
1995 }
1996
1997 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
1998 {
1999 return MachNode::size(ra_); // too many variables; just compute it
2000 // the hard way
2001 }
2002
2003 int MachEpilogNode::reloc() const
2004 {
2005 return 2; // a large enough number
2006 }
2007
2008 const Pipeline* MachEpilogNode::pipeline() const
2009 {
2010 return MachNode::pipeline_class();
2011 }
2012
2013 //=============================================================================
2014
2015 enum RC {
2016 rc_bad,
2017 rc_int,
2018 rc_kreg,
2019 rc_float,
2020 rc_stack
2021 };
2022
2023 static enum RC rc_class(OptoReg::Name reg)
2024 {
2025 if( !OptoReg::is_valid(reg) ) return rc_bad;
2026
2027 if (OptoReg::is_stack(reg)) return rc_stack;
2028
2029 VMReg r = OptoReg::as_VMReg(reg);
2030
2031 if (r->is_Register()) return rc_int;
2032
2033 if (r->is_KRegister()) return rc_kreg;
2034
2035 assert(r->is_XMMRegister(), "must be");
2036 return rc_float;
2037 }
2038
2039 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2040 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2041 int src_hi, int dst_hi, uint ireg, outputStream* st);
2042
2043 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2044 int stack_offset, int reg, uint ireg, outputStream* st);
2045
2046 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2047 int dst_offset, uint ireg, outputStream* st) {
2048 if (masm) {
2049 switch (ireg) {
2050 case Op_VecS:
2051 __ movq(Address(rsp, -8), rax);
2052 __ movl(rax, Address(rsp, src_offset));
2053 __ movl(Address(rsp, dst_offset), rax);
2054 __ movq(rax, Address(rsp, -8));
2055 break;
2056 case Op_VecD:
2057 __ pushq(Address(rsp, src_offset));
2058 __ popq (Address(rsp, dst_offset));
2059 break;
2060 case Op_VecX:
2061 __ pushq(Address(rsp, src_offset));
2062 __ popq (Address(rsp, dst_offset));
2063 __ pushq(Address(rsp, src_offset+8));
2064 __ popq (Address(rsp, dst_offset+8));
2065 break;
2066 case Op_VecY:
2067 __ vmovdqu(Address(rsp, -32), xmm0);
2068 __ vmovdqu(xmm0, Address(rsp, src_offset));
2069 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2070 __ vmovdqu(xmm0, Address(rsp, -32));
2071 break;
2072 case Op_VecZ:
2073 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2074 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2075 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2076 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2077 break;
2078 default:
2079 ShouldNotReachHere();
2080 }
2081 #ifndef PRODUCT
2082 } else {
2083 switch (ireg) {
2084 case Op_VecS:
2085 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2086 "movl rax, [rsp + #%d]\n\t"
2087 "movl [rsp + #%d], rax\n\t"
2088 "movq rax, [rsp - #8]",
2089 src_offset, dst_offset);
2090 break;
2091 case Op_VecD:
2092 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2093 "popq [rsp + #%d]",
2094 src_offset, dst_offset);
2095 break;
2096 case Op_VecX:
2097 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2098 "popq [rsp + #%d]\n\t"
2099 "pushq [rsp + #%d]\n\t"
2100 "popq [rsp + #%d]",
2101 src_offset, dst_offset, src_offset+8, dst_offset+8);
2102 break;
2103 case Op_VecY:
2104 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2105 "vmovdqu xmm0, [rsp + #%d]\n\t"
2106 "vmovdqu [rsp + #%d], xmm0\n\t"
2107 "vmovdqu xmm0, [rsp - #32]",
2108 src_offset, dst_offset);
2109 break;
2110 case Op_VecZ:
2111 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2112 "vmovdqu xmm0, [rsp + #%d]\n\t"
2113 "vmovdqu [rsp + #%d], xmm0\n\t"
2114 "vmovdqu xmm0, [rsp - #64]",
2115 src_offset, dst_offset);
2116 break;
2117 default:
2118 ShouldNotReachHere();
2119 }
2120 #endif
2121 }
2122 }
2123
2124 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2125 PhaseRegAlloc* ra_,
2126 bool do_size,
2127 outputStream* st) const {
2128 assert(masm != nullptr || st != nullptr, "sanity");
2129 // Get registers to move
2130 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2131 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2132 OptoReg::Name dst_second = ra_->get_reg_second(this);
2133 OptoReg::Name dst_first = ra_->get_reg_first(this);
2134
2135 enum RC src_second_rc = rc_class(src_second);
2136 enum RC src_first_rc = rc_class(src_first);
2137 enum RC dst_second_rc = rc_class(dst_second);
2138 enum RC dst_first_rc = rc_class(dst_first);
2139
2140 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2141 "must move at least 1 register" );
2142
2143 if (src_first == dst_first && src_second == dst_second) {
2144 // Self copy, no move
2145 return 0;
2146 }
2147 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
2148 uint ireg = ideal_reg();
2149 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2150 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2151 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2152 // mem -> mem
2153 int src_offset = ra_->reg2offset(src_first);
2154 int dst_offset = ra_->reg2offset(dst_first);
2155 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2156 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2157 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2158 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2159 int stack_offset = ra_->reg2offset(dst_first);
2160 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2161 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2162 int stack_offset = ra_->reg2offset(src_first);
2163 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2164 } else {
2165 ShouldNotReachHere();
2166 }
2167 return 0;
2168 }
2169 if (src_first_rc == rc_stack) {
2170 // mem ->
2171 if (dst_first_rc == rc_stack) {
2172 // mem -> mem
2173 assert(src_second != dst_first, "overlap");
2174 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2175 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2176 // 64-bit
2177 int src_offset = ra_->reg2offset(src_first);
2178 int dst_offset = ra_->reg2offset(dst_first);
2179 if (masm) {
2180 __ pushq(Address(rsp, src_offset));
2181 __ popq (Address(rsp, dst_offset));
2182 #ifndef PRODUCT
2183 } else {
2184 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2185 "popq [rsp + #%d]",
2186 src_offset, dst_offset);
2187 #endif
2188 }
2189 } else {
2190 // 32-bit
2191 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2192 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2193 // No pushl/popl, so:
2194 int src_offset = ra_->reg2offset(src_first);
2195 int dst_offset = ra_->reg2offset(dst_first);
2196 if (masm) {
2197 __ movq(Address(rsp, -8), rax);
2198 __ movl(rax, Address(rsp, src_offset));
2199 __ movl(Address(rsp, dst_offset), rax);
2200 __ movq(rax, Address(rsp, -8));
2201 #ifndef PRODUCT
2202 } else {
2203 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2204 "movl rax, [rsp + #%d]\n\t"
2205 "movl [rsp + #%d], rax\n\t"
2206 "movq rax, [rsp - #8]",
2207 src_offset, dst_offset);
2208 #endif
2209 }
2210 }
2211 return 0;
2212 } else if (dst_first_rc == rc_int) {
2213 // mem -> gpr
2214 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2215 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2216 // 64-bit
2217 int offset = ra_->reg2offset(src_first);
2218 if (masm) {
2219 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2220 #ifndef PRODUCT
2221 } else {
2222 st->print("movq %s, [rsp + #%d]\t# spill",
2223 Matcher::regName[dst_first],
2224 offset);
2225 #endif
2226 }
2227 } else {
2228 // 32-bit
2229 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2230 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2231 int offset = ra_->reg2offset(src_first);
2232 if (masm) {
2233 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2234 #ifndef PRODUCT
2235 } else {
2236 st->print("movl %s, [rsp + #%d]\t# spill",
2237 Matcher::regName[dst_first],
2238 offset);
2239 #endif
2240 }
2241 }
2242 return 0;
2243 } else if (dst_first_rc == rc_float) {
2244 // mem-> xmm
2245 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2246 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2247 // 64-bit
2248 int offset = ra_->reg2offset(src_first);
2249 if (masm) {
2250 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2251 #ifndef PRODUCT
2252 } else {
2253 st->print("%s %s, [rsp + #%d]\t# spill",
2254 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2255 Matcher::regName[dst_first],
2256 offset);
2257 #endif
2258 }
2259 } else {
2260 // 32-bit
2261 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2262 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2263 int offset = ra_->reg2offset(src_first);
2264 if (masm) {
2265 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2266 #ifndef PRODUCT
2267 } else {
2268 st->print("movss %s, [rsp + #%d]\t# spill",
2269 Matcher::regName[dst_first],
2270 offset);
2271 #endif
2272 }
2273 }
2274 return 0;
2275 } else if (dst_first_rc == rc_kreg) {
2276 // mem -> kreg
2277 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2278 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2279 // 64-bit
2280 int offset = ra_->reg2offset(src_first);
2281 if (masm) {
2282 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2283 #ifndef PRODUCT
2284 } else {
2285 st->print("kmovq %s, [rsp + #%d]\t# spill",
2286 Matcher::regName[dst_first],
2287 offset);
2288 #endif
2289 }
2290 }
2291 return 0;
2292 }
2293 } else if (src_first_rc == rc_int) {
2294 // gpr ->
2295 if (dst_first_rc == rc_stack) {
2296 // gpr -> mem
2297 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2298 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2299 // 64-bit
2300 int offset = ra_->reg2offset(dst_first);
2301 if (masm) {
2302 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2303 #ifndef PRODUCT
2304 } else {
2305 st->print("movq [rsp + #%d], %s\t# spill",
2306 offset,
2307 Matcher::regName[src_first]);
2308 #endif
2309 }
2310 } else {
2311 // 32-bit
2312 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2313 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2314 int offset = ra_->reg2offset(dst_first);
2315 if (masm) {
2316 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2317 #ifndef PRODUCT
2318 } else {
2319 st->print("movl [rsp + #%d], %s\t# spill",
2320 offset,
2321 Matcher::regName[src_first]);
2322 #endif
2323 }
2324 }
2325 return 0;
2326 } else if (dst_first_rc == rc_int) {
2327 // gpr -> gpr
2328 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2329 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2330 // 64-bit
2331 if (masm) {
2332 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2333 as_Register(Matcher::_regEncode[src_first]));
2334 #ifndef PRODUCT
2335 } else {
2336 st->print("movq %s, %s\t# spill",
2337 Matcher::regName[dst_first],
2338 Matcher::regName[src_first]);
2339 #endif
2340 }
2341 return 0;
2342 } else {
2343 // 32-bit
2344 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2345 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2346 if (masm) {
2347 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2348 as_Register(Matcher::_regEncode[src_first]));
2349 #ifndef PRODUCT
2350 } else {
2351 st->print("movl %s, %s\t# spill",
2352 Matcher::regName[dst_first],
2353 Matcher::regName[src_first]);
2354 #endif
2355 }
2356 return 0;
2357 }
2358 } else if (dst_first_rc == rc_float) {
2359 // gpr -> xmm
2360 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2361 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2362 // 64-bit
2363 if (masm) {
2364 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2365 #ifndef PRODUCT
2366 } else {
2367 st->print("movdq %s, %s\t# spill",
2368 Matcher::regName[dst_first],
2369 Matcher::regName[src_first]);
2370 #endif
2371 }
2372 } else {
2373 // 32-bit
2374 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2375 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2376 if (masm) {
2377 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2378 #ifndef PRODUCT
2379 } else {
2380 st->print("movdl %s, %s\t# spill",
2381 Matcher::regName[dst_first],
2382 Matcher::regName[src_first]);
2383 #endif
2384 }
2385 }
2386 return 0;
2387 } else if (dst_first_rc == rc_kreg) {
2388 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2389 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2390 // 64-bit
2391 if (masm) {
2392 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2393 #ifndef PRODUCT
2394 } else {
2395 st->print("kmovq %s, %s\t# spill",
2396 Matcher::regName[dst_first],
2397 Matcher::regName[src_first]);
2398 #endif
2399 }
2400 }
2401 Unimplemented();
2402 return 0;
2403 }
2404 } else if (src_first_rc == rc_float) {
2405 // xmm ->
2406 if (dst_first_rc == rc_stack) {
2407 // xmm -> mem
2408 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2409 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2410 // 64-bit
2411 int offset = ra_->reg2offset(dst_first);
2412 if (masm) {
2413 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2414 #ifndef PRODUCT
2415 } else {
2416 st->print("movsd [rsp + #%d], %s\t# spill",
2417 offset,
2418 Matcher::regName[src_first]);
2419 #endif
2420 }
2421 } else {
2422 // 32-bit
2423 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2424 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2425 int offset = ra_->reg2offset(dst_first);
2426 if (masm) {
2427 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2428 #ifndef PRODUCT
2429 } else {
2430 st->print("movss [rsp + #%d], %s\t# spill",
2431 offset,
2432 Matcher::regName[src_first]);
2433 #endif
2434 }
2435 }
2436 return 0;
2437 } else if (dst_first_rc == rc_int) {
2438 // xmm -> gpr
2439 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2440 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2441 // 64-bit
2442 if (masm) {
2443 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2444 #ifndef PRODUCT
2445 } else {
2446 st->print("movdq %s, %s\t# spill",
2447 Matcher::regName[dst_first],
2448 Matcher::regName[src_first]);
2449 #endif
2450 }
2451 } else {
2452 // 32-bit
2453 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2454 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2455 if (masm) {
2456 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2457 #ifndef PRODUCT
2458 } else {
2459 st->print("movdl %s, %s\t# spill",
2460 Matcher::regName[dst_first],
2461 Matcher::regName[src_first]);
2462 #endif
2463 }
2464 }
2465 return 0;
2466 } else if (dst_first_rc == rc_float) {
2467 // xmm -> xmm
2468 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2469 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2470 // 64-bit
2471 if (masm) {
2472 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2473 #ifndef PRODUCT
2474 } else {
2475 st->print("%s %s, %s\t# spill",
2476 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2477 Matcher::regName[dst_first],
2478 Matcher::regName[src_first]);
2479 #endif
2480 }
2481 } else {
2482 // 32-bit
2483 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2484 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2485 if (masm) {
2486 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2487 #ifndef PRODUCT
2488 } else {
2489 st->print("%s %s, %s\t# spill",
2490 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2491 Matcher::regName[dst_first],
2492 Matcher::regName[src_first]);
2493 #endif
2494 }
2495 }
2496 return 0;
2497 } else if (dst_first_rc == rc_kreg) {
2498 assert(false, "Illegal spilling");
2499 return 0;
2500 }
2501 } else if (src_first_rc == rc_kreg) {
2502 if (dst_first_rc == rc_stack) {
2503 // mem -> kreg
2504 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2505 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2506 // 64-bit
2507 int offset = ra_->reg2offset(dst_first);
2508 if (masm) {
2509 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2510 #ifndef PRODUCT
2511 } else {
2512 st->print("kmovq [rsp + #%d] , %s\t# spill",
2513 offset,
2514 Matcher::regName[src_first]);
2515 #endif
2516 }
2517 }
2518 return 0;
2519 } else if (dst_first_rc == rc_int) {
2520 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2521 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2522 // 64-bit
2523 if (masm) {
2524 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2525 #ifndef PRODUCT
2526 } else {
2527 st->print("kmovq %s, %s\t# spill",
2528 Matcher::regName[dst_first],
2529 Matcher::regName[src_first]);
2530 #endif
2531 }
2532 }
2533 Unimplemented();
2534 return 0;
2535 } else if (dst_first_rc == rc_kreg) {
2536 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2537 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2538 // 64-bit
2539 if (masm) {
2540 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2541 #ifndef PRODUCT
2542 } else {
2543 st->print("kmovq %s, %s\t# spill",
2544 Matcher::regName[dst_first],
2545 Matcher::regName[src_first]);
2546 #endif
2547 }
2548 }
2549 return 0;
2550 } else if (dst_first_rc == rc_float) {
2551 assert(false, "Illegal spill");
2552 return 0;
2553 }
2554 }
2555
2556 assert(0," foo ");
2557 Unimplemented();
2558 return 0;
2559 }
2560
2561 #ifndef PRODUCT
2562 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2563 implementation(nullptr, ra_, false, st);
2564 }
2565 #endif
2566
2567 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2568 implementation(masm, ra_, false, nullptr);
2569 }
2570
2571 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2572 return MachNode::size(ra_);
2573 }
2574
2575 //=============================================================================
2576 #ifndef PRODUCT
2577 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2578 {
2579 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2580 int reg = ra_->get_reg_first(this);
2581 st->print("leaq %s, [rsp + #%d]\t# box lock",
2582 Matcher::regName[reg], offset);
2583 }
2584 #endif
2585
2586 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2587 {
2588 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2589 int reg = ra_->get_encode(this);
2590
2591 __ lea(as_Register(reg), Address(rsp, offset));
2592 }
2593
2594 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2595 {
2596 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2597 if (ra_->get_encode(this) > 15) {
2598 return (offset < 0x80) ? 6 : 9; // REX2
2599 } else {
2600 return (offset < 0x80) ? 5 : 8; // REX
2601 }
2602 }
2603
2604 //=============================================================================
2605 #ifndef PRODUCT
2606 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2607 {
2608 if (UseCompressedClassPointers) {
2609 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2610 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2611 } else {
2612 st->print_cr("movq rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2613 st->print_cr("\tcmpq rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2614 }
2615 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2616 }
2617 #endif
2618
2619 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2620 {
2621 __ ic_check(InteriorEntryAlignment);
2622 }
2623
2624 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2625 {
2626 return MachNode::size(ra_); // too many variables; just compute it
2627 // the hard way
2628 }
2629
2630
2631 //=============================================================================
2632
2633 bool Matcher::supports_vector_calling_convention(void) {
2634 return EnableVectorSupport;
2635 }
2636
2637 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2638 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2639 }
2640
2641 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2642 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2643 }
2644
2645 #ifdef ASSERT
2646 static bool is_ndd_demotable(const MachNode* mdef) {
2647 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2648 }
2649 #endif
2650
2651 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2652 int oper_index) {
2653 if (mdef == nullptr) {
2654 return false;
2655 }
2656
2657 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2658 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2659 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2660 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2661 return false;
2662 }
2663
2664 // Complex memory operand covers multiple incoming edges needed for
2665 // address computation. Biasing def towards any address component will not
2666 // result in NDD demotion by assembler.
2667 if (mdef->operand_num_edges(oper_index) != 1) {
2668 return false;
2669 }
2670
2671 // Demotion candidate must be register mask compatible with definition.
2672 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2673 if (!oper_mask.overlap(mdef->out_RegMask())) {
2674 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2675 return false;
2676 }
2677
2678 switch (oper_index) {
2679 // First operand of MachNode corresponding to Intel APX NDD selection
2680 // pattern can share its assigned register with definition operand if
2681 // their live ranges do not overlap. In such a scenario we can demote
2682 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2683 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2684 // are decorated with a special flag by instruction selector.
2685 case 1:
2686 return is_ndd_demotable_opr1(mdef);
2687
2688 // Definition operand of commutative operation can be biased towards second
2689 // operand.
2690 case 2:
2691 return is_ndd_demotable_opr2(mdef);
2692
2693 // Current scheme only selects up to two biasing candidates
2694 default:
2695 assert(false, "unhandled operand index: %s", mdef->Name());
2696 break;
2697 }
2698
2699 return false;
2700 }
2701
2702 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2703 assert(EnableVectorSupport, "sanity");
2704 int lo = XMM0_num;
2705 int hi = XMM0b_num;
2706 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2707 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2708 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2709 return OptoRegPair(hi, lo);
2710 }
2711
2712 // Is this branch offset short enough that a short branch can be used?
2713 //
2714 // NOTE: If the platform does not provide any short branch variants, then
2715 // this method should return false for offset 0.
2716 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2717 // The passed offset is relative to address of the branch.
2718 // On 86 a branch displacement is calculated relative to address
2719 // of a next instruction.
2720 offset -= br_size;
2721
2722 // the short version of jmpConUCF2 contains multiple branches,
2723 // making the reach slightly less
2724 if (rule == jmpConUCF2_rule)
2725 return (-126 <= offset && offset <= 125);
2726 return (-128 <= offset && offset <= 127);
2727 }
2728
2729 #ifdef ASSERT
2730 // Return whether or not this register is ever used as an argument.
2731 bool Matcher::can_be_java_arg(int reg)
2732 {
2733 return
2734 reg == RDI_num || reg == RDI_H_num ||
2735 reg == RSI_num || reg == RSI_H_num ||
2736 reg == RDX_num || reg == RDX_H_num ||
2737 reg == RCX_num || reg == RCX_H_num ||
2738 reg == R8_num || reg == R8_H_num ||
2739 reg == R9_num || reg == R9_H_num ||
2740 reg == R12_num || reg == R12_H_num ||
2741 reg == XMM0_num || reg == XMM0b_num ||
2742 reg == XMM1_num || reg == XMM1b_num ||
2743 reg == XMM2_num || reg == XMM2b_num ||
2744 reg == XMM3_num || reg == XMM3b_num ||
2745 reg == XMM4_num || reg == XMM4b_num ||
2746 reg == XMM5_num || reg == XMM5b_num ||
2747 reg == XMM6_num || reg == XMM6b_num ||
2748 reg == XMM7_num || reg == XMM7b_num;
2749 }
2750 #endif
2751
2752 uint Matcher::int_pressure_limit()
2753 {
2754 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2755 }
2756
2757 uint Matcher::float_pressure_limit()
2758 {
2759 // After experiment around with different values, the following default threshold
2760 // works best for LCM's register pressure scheduling on x64.
2761 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2762 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2763 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2764 }
2765
2766 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
2767 // In 64 bit mode a code which use multiply when
2768 // devisor is constant is faster than hardware
2769 // DIV instruction (it uses MulHiL).
2770 return false;
2771 }
2772
2773 // Register for DIVI projection of divmodI
2774 const RegMask& Matcher::divI_proj_mask() {
2775 return INT_RAX_REG_mask();
2776 }
2777
2778 // Register for MODI projection of divmodI
2779 const RegMask& Matcher::modI_proj_mask() {
2780 return INT_RDX_REG_mask();
2781 }
2782
2783 // Register for DIVL projection of divmodL
2784 const RegMask& Matcher::divL_proj_mask() {
2785 return LONG_RAX_REG_mask();
2786 }
2787
2788 // Register for MODL projection of divmodL
2789 const RegMask& Matcher::modL_proj_mask() {
2790 return LONG_RDX_REG_mask();
2791 }
2792
2793 %}
2794
2795 source_hpp %{
2796 // Header information of the source block.
2797 // Method declarations/definitions which are used outside
2798 // the ad-scope can conveniently be defined here.
2799 //
2800 // To keep related declarations/definitions/uses close together,
2801 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2802
2803 #include "runtime/vm_version.hpp"
2804
2805 class NativeJump;
2806
2807 class CallStubImpl {
2808
2809 //--------------------------------------------------------------
2810 //---< Used for optimization in Compile::shorten_branches >---
2811 //--------------------------------------------------------------
2812
2813 public:
2814 // Size of call trampoline stub.
2815 static uint size_call_trampoline() {
2816 return 0; // no call trampolines on this platform
2817 }
2818
2819 // number of relocations needed by a call trampoline stub
2820 static uint reloc_call_trampoline() {
2821 return 0; // no call trampolines on this platform
2822 }
2823 };
2824
2825 class HandlerImpl {
2826
2827 public:
2828
2829 static int emit_deopt_handler(C2_MacroAssembler* masm);
2830
2831 static uint size_deopt_handler() {
2832 // one call and one jmp.
2833 return 7;
2834 }
2835 };
2836
2837 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2838 switch(bytes) {
2839 case 4: // fall-through
2840 case 8: // fall-through
2841 case 16: return Assembler::AVX_128bit;
2842 case 32: return Assembler::AVX_256bit;
2843 case 64: return Assembler::AVX_512bit;
2844
2845 default: {
2846 ShouldNotReachHere();
2847 return Assembler::AVX_NoVec;
2848 }
2849 }
2850 }
2851
2852 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2853 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2854 }
2855
2856 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2857 uint def_idx = use->operand_index(opnd);
2858 Node* def = use->in(def_idx);
2859 return vector_length_encoding(def);
2860 }
2861
2862 static inline bool is_vector_popcount_predicate(BasicType bt) {
2863 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2864 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2865 }
2866
2867 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2868 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2869 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2870 }
2871
2872 class Node::PD {
2873 public:
2874 enum NodeFlags : uint64_t {
2875 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2876 Flag_sets_carry_flag = Node::_last_flag << 2,
2877 Flag_sets_parity_flag = Node::_last_flag << 3,
2878 Flag_sets_zero_flag = Node::_last_flag << 4,
2879 Flag_sets_overflow_flag = Node::_last_flag << 5,
2880 Flag_sets_sign_flag = Node::_last_flag << 6,
2881 Flag_clears_carry_flag = Node::_last_flag << 7,
2882 Flag_clears_parity_flag = Node::_last_flag << 8,
2883 Flag_clears_zero_flag = Node::_last_flag << 9,
2884 Flag_clears_overflow_flag = Node::_last_flag << 10,
2885 Flag_clears_sign_flag = Node::_last_flag << 11,
2886 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2887 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2888 _last_flag = Flag_ndd_demotable_opr2
2889 };
2890 };
2891
2892 %} // end source_hpp
2893
2894 source %{
2895
2896 #include "opto/addnode.hpp"
2897 #include "c2_intelJccErratum_x86.hpp"
2898
2899 void PhaseOutput::pd_perform_mach_node_analysis() {
2900 if (VM_Version::has_intel_jcc_erratum()) {
2901 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2902 _buf_sizes._code += extra_padding;
2903 }
2904 }
2905
2906 int MachNode::pd_alignment_required() const {
2907 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2908 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2909 return IntelJccErratum::largest_jcc_size() + 1;
2910 } else {
2911 return 1;
2912 }
2913 }
2914
2915 int MachNode::compute_padding(int current_offset) const {
2916 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2917 Compile* C = Compile::current();
2918 PhaseOutput* output = C->output();
2919 Block* block = output->block();
2920 int index = output->index();
2921 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2922 } else {
2923 return 0;
2924 }
2925 }
2926
2927 // Emit deopt handler code.
2928 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2929
2930 // Note that the code buffer's insts_mark is always relative to insts.
2931 // That's why we must use the macroassembler to generate a handler.
2932 address base = __ start_a_stub(size_deopt_handler());
2933 if (base == nullptr) {
2934 ciEnv::current()->record_failure("CodeCache is full");
2935 return 0; // CodeBuffer::expand failed
2936 }
2937 int offset = __ offset();
2938
2939 Label start;
2940 __ bind(start);
2941
2942 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2943
2944 int entry_offset = __ offset();
2945
2946 __ jmp(start);
2947
2948 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2949 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2950 "out of bounds read in post-call NOP check");
2951 __ end_a_stub();
2952 return entry_offset;
2953 }
2954
2955 static Assembler::Width widthForType(BasicType bt) {
2956 if (bt == T_BYTE) {
2957 return Assembler::B;
2958 } else if (bt == T_SHORT) {
2959 return Assembler::W;
2960 } else if (bt == T_INT) {
2961 return Assembler::D;
2962 } else {
2963 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2964 return Assembler::Q;
2965 }
2966 }
2967
2968 //=============================================================================
2969
2970 // Float masks come from different places depending on platform.
2971 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2972 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2973 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2974 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2975 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2976 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2977 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2978 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2979 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2980 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2981 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2982 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2983 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2984 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2985 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2986 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2987 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2988 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2989 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2990
2991 //=============================================================================
2992 bool Matcher::match_rule_supported(int opcode) {
2993 if (!has_match_rule(opcode)) {
2994 return false; // no match rule present
2995 }
2996 switch (opcode) {
2997 case Op_AbsVL:
2998 case Op_StoreVectorScatter:
2999 if (UseAVX < 3) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountI:
3004 case Op_PopCountL:
3005 if (!UsePopCountInstruction) {
3006 return false;
3007 }
3008 break;
3009 case Op_PopCountVI:
3010 if (UseAVX < 2) {
3011 return false;
3012 }
3013 break;
3014 case Op_CompressV:
3015 case Op_ExpandV:
3016 case Op_PopCountVL:
3017 if (UseAVX < 2) {
3018 return false;
3019 }
3020 break;
3021 case Op_MulVI:
3022 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3023 return false;
3024 }
3025 break;
3026 case Op_MulVL:
3027 if (UseSSE < 4) { // only with SSE4_1 or AVX
3028 return false;
3029 }
3030 break;
3031 case Op_MulReductionVL:
3032 if (VM_Version::supports_avx512dq() == false) {
3033 return false;
3034 }
3035 break;
3036 case Op_AbsVB:
3037 case Op_AbsVS:
3038 case Op_AbsVI:
3039 case Op_AddReductionVI:
3040 case Op_AndReductionV:
3041 case Op_OrReductionV:
3042 case Op_XorReductionV:
3043 if (UseSSE < 3) { // requires at least SSSE3
3044 return false;
3045 }
3046 break;
3047 case Op_MaxHF:
3048 case Op_MinHF:
3049 if (!VM_Version::supports_avx512vlbw()) {
3050 return false;
3051 } // fallthrough
3052 case Op_AddHF:
3053 case Op_DivHF:
3054 case Op_FmaHF:
3055 case Op_MulHF:
3056 case Op_ReinterpretS2HF:
3057 case Op_ReinterpretHF2S:
3058 case Op_SubHF:
3059 case Op_SqrtHF:
3060 if (!VM_Version::supports_avx512_fp16()) {
3061 return false;
3062 }
3063 break;
3064 case Op_VectorLoadShuffle:
3065 case Op_VectorRearrange:
3066 case Op_MulReductionVI:
3067 if (UseSSE < 4) { // requires at least SSE4
3068 return false;
3069 }
3070 break;
3071 case Op_IsInfiniteF:
3072 case Op_IsInfiniteD:
3073 if (!VM_Version::supports_avx512dq()) {
3074 return false;
3075 }
3076 break;
3077 case Op_SqrtVD:
3078 case Op_SqrtVF:
3079 case Op_VectorMaskCmp:
3080 case Op_VectorCastB2X:
3081 case Op_VectorCastS2X:
3082 case Op_VectorCastI2X:
3083 case Op_VectorCastL2X:
3084 case Op_VectorCastF2X:
3085 case Op_VectorCastD2X:
3086 case Op_VectorUCastB2X:
3087 case Op_VectorUCastS2X:
3088 case Op_VectorUCastI2X:
3089 case Op_VectorMaskCast:
3090 if (UseAVX < 1) { // enabled for AVX only
3091 return false;
3092 }
3093 break;
3094 case Op_PopulateIndex:
3095 if (UseAVX < 2) {
3096 return false;
3097 }
3098 break;
3099 case Op_RoundVF:
3100 if (UseAVX < 2) { // enabled for AVX2 only
3101 return false;
3102 }
3103 break;
3104 case Op_RoundVD:
3105 if (UseAVX < 3) {
3106 return false; // enabled for AVX3 only
3107 }
3108 break;
3109 case Op_CompareAndSwapL:
3110 case Op_CompareAndSwapP:
3111 break;
3112 case Op_StrIndexOf:
3113 if (!UseSSE42Intrinsics) {
3114 return false;
3115 }
3116 break;
3117 case Op_StrIndexOfChar:
3118 if (!UseSSE42Intrinsics) {
3119 return false;
3120 }
3121 break;
3122 case Op_OnSpinWait:
3123 if (VM_Version::supports_on_spin_wait() == false) {
3124 return false;
3125 }
3126 break;
3127 case Op_MulVB:
3128 case Op_LShiftVB:
3129 case Op_RShiftVB:
3130 case Op_URShiftVB:
3131 case Op_VectorInsert:
3132 case Op_VectorLoadMask:
3133 case Op_VectorStoreMask:
3134 case Op_VectorBlend:
3135 if (UseSSE < 4) {
3136 return false;
3137 }
3138 break;
3139 case Op_MaxD:
3140 case Op_MaxF:
3141 case Op_MinD:
3142 case Op_MinF:
3143 if (UseAVX < 1) { // enabled for AVX only
3144 return false;
3145 }
3146 break;
3147 case Op_CacheWB:
3148 case Op_CacheWBPreSync:
3149 case Op_CacheWBPostSync:
3150 if (!VM_Version::supports_data_cache_line_flush()) {
3151 return false;
3152 }
3153 break;
3154 case Op_ExtractB:
3155 case Op_ExtractL:
3156 case Op_ExtractI:
3157 case Op_RoundDoubleMode:
3158 if (UseSSE < 4) {
3159 return false;
3160 }
3161 break;
3162 case Op_RoundDoubleModeV:
3163 if (VM_Version::supports_avx() == false) {
3164 return false; // 128bit vroundpd is not available
3165 }
3166 break;
3167 case Op_LoadVectorGather:
3168 case Op_LoadVectorGatherMasked:
3169 if (UseAVX < 2) {
3170 return false;
3171 }
3172 break;
3173 case Op_FmaF:
3174 case Op_FmaD:
3175 case Op_FmaVD:
3176 case Op_FmaVF:
3177 if (!UseFMA) {
3178 return false;
3179 }
3180 break;
3181 case Op_MacroLogicV:
3182 if (UseAVX < 3 || !UseVectorMacroLogic) {
3183 return false;
3184 }
3185 break;
3186
3187 case Op_VectorCmpMasked:
3188 case Op_VectorMaskGen:
3189 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3190 return false;
3191 }
3192 break;
3193 case Op_VectorMaskFirstTrue:
3194 case Op_VectorMaskLastTrue:
3195 case Op_VectorMaskTrueCount:
3196 case Op_VectorMaskToLong:
3197 if (UseAVX < 1) {
3198 return false;
3199 }
3200 break;
3201 case Op_RoundF:
3202 case Op_RoundD:
3203 break;
3204 case Op_CopySignD:
3205 case Op_CopySignF:
3206 if (UseAVX < 3) {
3207 return false;
3208 }
3209 if (!VM_Version::supports_avx512vl()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressBits:
3214 case Op_ExpandBits:
3215 if (!VM_Version::supports_bmi2()) {
3216 return false;
3217 }
3218 break;
3219 case Op_CompressM:
3220 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3221 return false;
3222 }
3223 break;
3224 case Op_ConvF2HF:
3225 case Op_ConvHF2F:
3226 if (!VM_Version::supports_float16()) {
3227 return false;
3228 }
3229 break;
3230 case Op_VectorCastF2HF:
3231 case Op_VectorCastHF2F:
3232 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3233 return false;
3234 }
3235 break;
3236 }
3237 return true; // Match rules are supported by default.
3238 }
3239
3240 //------------------------------------------------------------------------
3241
3242 static inline bool is_pop_count_instr_target(BasicType bt) {
3243 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3244 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3245 }
3246
3247 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3248 return match_rule_supported_vector(opcode, vlen, bt);
3249 }
3250
3251 // Identify extra cases that we might want to provide match rules for vector nodes and
3252 // other intrinsics guarded with vector length (vlen) and element type (bt).
3253 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3254 if (!match_rule_supported(opcode)) {
3255 return false;
3256 }
3257 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3258 // * SSE2 supports 128bit vectors for all types;
3259 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3260 // * AVX2 supports 256bit vectors for all types;
3261 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3262 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3263 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3264 // And MaxVectorSize is taken into account as well.
3265 if (!vector_size_supported(bt, vlen)) {
3266 return false;
3267 }
3268 // Special cases which require vector length follow:
3269 // * implementation limitations
3270 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3271 // * 128bit vroundpd instruction is present only in AVX1
3272 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3273 switch (opcode) {
3274 case Op_MaxVHF:
3275 case Op_MinVHF:
3276 if (!VM_Version::supports_avx512bw()) {
3277 return false;
3278 }
3279 case Op_AddVHF:
3280 case Op_DivVHF:
3281 case Op_FmaVHF:
3282 case Op_MulVHF:
3283 case Op_SubVHF:
3284 case Op_SqrtVHF:
3285 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3286 return false;
3287 }
3288 if (!VM_Version::supports_avx512_fp16()) {
3289 return false;
3290 }
3291 break;
3292 case Op_AbsVF:
3293 case Op_NegVF:
3294 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vandps and vxorps are not available
3296 }
3297 break;
3298 case Op_AbsVD:
3299 case Op_NegVD:
3300 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3301 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3302 }
3303 break;
3304 case Op_RotateRightV:
3305 case Op_RotateLeftV:
3306 if (bt != T_INT && bt != T_LONG) {
3307 return false;
3308 } // fallthrough
3309 case Op_MacroLogicV:
3310 if (!VM_Version::supports_evex() ||
3311 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3312 return false;
3313 }
3314 break;
3315 case Op_ClearArray:
3316 case Op_VectorMaskGen:
3317 case Op_VectorCmpMasked:
3318 if (!VM_Version::supports_avx512bw()) {
3319 return false;
3320 }
3321 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3322 return false;
3323 }
3324 break;
3325 case Op_LoadVectorMasked:
3326 case Op_StoreVectorMasked:
3327 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinV:
3332 case Op_UMaxV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 break;
3337 case Op_UMinReductionV:
3338 case Op_UMaxReductionV:
3339 if (UseAVX == 0) {
3340 return false;
3341 }
3342 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3343 return false;
3344 }
3345 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3346 return false;
3347 }
3348 break;
3349 case Op_MaxV:
3350 case Op_MinV:
3351 if (UseSSE < 4 && is_integral_type(bt)) {
3352 return false;
3353 }
3354 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3355 // Float/Double intrinsics are enabled for AVX family currently.
3356 if (UseAVX == 0) {
3357 return false;
3358 }
3359 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3360 return false;
3361 }
3362 }
3363 break;
3364 case Op_CallLeafVector:
3365 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3366 return false;
3367 }
3368 break;
3369 case Op_AddReductionVI:
3370 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3371 return false;
3372 }
3373 // fallthrough
3374 case Op_AndReductionV:
3375 case Op_OrReductionV:
3376 case Op_XorReductionV:
3377 if (is_subword_type(bt) && (UseSSE < 4)) {
3378 return false;
3379 }
3380 break;
3381 case Op_MinReductionV:
3382 case Op_MaxReductionV:
3383 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3384 return false;
3385 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3386 return false;
3387 }
3388 // Float/Double intrinsics enabled for AVX family.
3389 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3390 return false;
3391 }
3392 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3393 return false;
3394 }
3395 break;
3396 case Op_VectorBlend:
3397 if (UseAVX == 0 && size_in_bits < 128) {
3398 return false;
3399 }
3400 break;
3401 case Op_VectorTest:
3402 if (UseSSE < 4) {
3403 return false; // Implementation limitation
3404 } else if (size_in_bits < 32) {
3405 return false; // Implementation limitation
3406 }
3407 break;
3408 case Op_VectorLoadShuffle:
3409 case Op_VectorRearrange:
3410 if(vlen == 2) {
3411 return false; // Implementation limitation due to how shuffle is loaded
3412 } else if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 break;
3416 case Op_VectorLoadMask:
3417 case Op_VectorMaskCast:
3418 if (size_in_bits == 256 && UseAVX < 2) {
3419 return false; // Implementation limitation
3420 }
3421 // fallthrough
3422 case Op_VectorStoreMask:
3423 if (vlen == 2) {
3424 return false; // Implementation limitation
3425 }
3426 break;
3427 case Op_PopulateIndex:
3428 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3429 return false;
3430 }
3431 break;
3432 case Op_VectorCastB2X:
3433 case Op_VectorCastS2X:
3434 case Op_VectorCastI2X:
3435 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3436 return false;
3437 }
3438 break;
3439 case Op_VectorCastL2X:
3440 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3441 return false;
3442 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3443 return false;
3444 }
3445 break;
3446 case Op_VectorCastF2X: {
3447 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3448 // happen after intermediate conversion to integer and special handling
3449 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3450 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3451 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3452 return false;
3453 }
3454 }
3455 // fallthrough
3456 case Op_VectorCastD2X:
3457 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3458 return false;
3459 }
3460 break;
3461 case Op_VectorCastF2HF:
3462 case Op_VectorCastHF2F:
3463 if (!VM_Version::supports_f16c() &&
3464 ((!VM_Version::supports_evex() ||
3465 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3466 return false;
3467 }
3468 break;
3469 case Op_RoundVD:
3470 if (!VM_Version::supports_avx512dq()) {
3471 return false;
3472 }
3473 break;
3474 case Op_MulReductionVI:
3475 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3476 return false;
3477 }
3478 break;
3479 case Op_LoadVectorGatherMasked:
3480 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3481 return false;
3482 }
3483 if (is_subword_type(bt) &&
3484 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3485 (size_in_bits < 64) ||
3486 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3487 return false;
3488 }
3489 break;
3490 case Op_StoreVectorScatterMasked:
3491 case Op_StoreVectorScatter:
3492 if (is_subword_type(bt)) {
3493 return false;
3494 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3495 return false;
3496 }
3497 // fallthrough
3498 case Op_LoadVectorGather:
3499 if (!is_subword_type(bt) && size_in_bits == 64) {
3500 return false;
3501 }
3502 if (is_subword_type(bt) && size_in_bits < 64) {
3503 return false;
3504 }
3505 break;
3506 case Op_SaturatingAddV:
3507 case Op_SaturatingSubV:
3508 if (UseAVX < 1) {
3509 return false; // Implementation limitation
3510 }
3511 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3512 return false;
3513 }
3514 break;
3515 case Op_SelectFromTwoVector:
3516 if (size_in_bits < 128) {
3517 return false;
3518 }
3519 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3520 return false;
3521 }
3522 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3523 return false;
3524 }
3525 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3526 return false;
3527 }
3528 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3529 return false;
3530 }
3531 break;
3532 case Op_MaskAll:
3533 if (!VM_Version::supports_evex()) {
3534 return false;
3535 }
3536 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3537 return false;
3538 }
3539 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3540 return false;
3541 }
3542 break;
3543 case Op_VectorMaskCmp:
3544 if (vlen < 2 || size_in_bits < 32) {
3545 return false;
3546 }
3547 break;
3548 case Op_CompressM:
3549 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3550 return false;
3551 }
3552 break;
3553 case Op_CompressV:
3554 case Op_ExpandV:
3555 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3556 return false;
3557 }
3558 if (size_in_bits < 128 ) {
3559 return false;
3560 }
3561 case Op_VectorLongToMask:
3562 if (UseAVX < 1) {
3563 return false;
3564 }
3565 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3566 return false;
3567 }
3568 break;
3569 case Op_SignumVD:
3570 case Op_SignumVF:
3571 if (UseAVX < 1) {
3572 return false;
3573 }
3574 break;
3575 case Op_PopCountVI:
3576 case Op_PopCountVL: {
3577 if (!is_pop_count_instr_target(bt) &&
3578 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3579 return false;
3580 }
3581 }
3582 break;
3583 case Op_ReverseV:
3584 case Op_ReverseBytesV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 case Op_CountTrailingZerosV:
3590 case Op_CountLeadingZerosV:
3591 if (UseAVX < 2) {
3592 return false;
3593 }
3594 break;
3595 }
3596 return true; // Per default match rules are supported.
3597 }
3598
3599 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3600 // ADLC based match_rule_supported routine checks for the existence of pattern based
3601 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3602 // of their non-masked counterpart with mask edge being the differentiator.
3603 // This routine does a strict check on the existence of masked operation patterns
3604 // by returning a default false value for all the other opcodes apart from the
3605 // ones whose masked instruction patterns are defined in this file.
3606 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3607 return false;
3608 }
3609
3610 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3611 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3612 return false;
3613 }
3614 switch(opcode) {
3615 // Unary masked operations
3616 case Op_AbsVB:
3617 case Op_AbsVS:
3618 if(!VM_Version::supports_avx512bw()) {
3619 return false; // Implementation limitation
3620 }
3621 case Op_AbsVI:
3622 case Op_AbsVL:
3623 return true;
3624
3625 // Ternary masked operations
3626 case Op_FmaVF:
3627 case Op_FmaVD:
3628 return true;
3629
3630 case Op_MacroLogicV:
3631 if(bt != T_INT && bt != T_LONG) {
3632 return false;
3633 }
3634 return true;
3635
3636 // Binary masked operations
3637 case Op_AddVB:
3638 case Op_AddVS:
3639 case Op_SubVB:
3640 case Op_SubVS:
3641 case Op_MulVS:
3642 case Op_LShiftVS:
3643 case Op_RShiftVS:
3644 case Op_URShiftVS:
3645 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3646 if (!VM_Version::supports_avx512bw()) {
3647 return false; // Implementation limitation
3648 }
3649 return true;
3650
3651 case Op_MulVL:
3652 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3653 if (!VM_Version::supports_avx512dq()) {
3654 return false; // Implementation limitation
3655 }
3656 return true;
3657
3658 case Op_AndV:
3659 case Op_OrV:
3660 case Op_XorV:
3661 case Op_RotateRightV:
3662 case Op_RotateLeftV:
3663 if (bt != T_INT && bt != T_LONG) {
3664 return false; // Implementation limitation
3665 }
3666 return true;
3667
3668 case Op_VectorLoadMask:
3669 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3670 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3671 return false;
3672 }
3673 return true;
3674
3675 case Op_AddVI:
3676 case Op_AddVL:
3677 case Op_AddVF:
3678 case Op_AddVD:
3679 case Op_SubVI:
3680 case Op_SubVL:
3681 case Op_SubVF:
3682 case Op_SubVD:
3683 case Op_MulVI:
3684 case Op_MulVF:
3685 case Op_MulVD:
3686 case Op_DivVF:
3687 case Op_DivVD:
3688 case Op_SqrtVF:
3689 case Op_SqrtVD:
3690 case Op_LShiftVI:
3691 case Op_LShiftVL:
3692 case Op_RShiftVI:
3693 case Op_RShiftVL:
3694 case Op_URShiftVI:
3695 case Op_URShiftVL:
3696 case Op_LoadVectorMasked:
3697 case Op_StoreVectorMasked:
3698 case Op_LoadVectorGatherMasked:
3699 case Op_StoreVectorScatterMasked:
3700 return true;
3701
3702 case Op_UMinV:
3703 case Op_UMaxV:
3704 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3705 return false;
3706 } // fallthrough
3707 case Op_MaxV:
3708 case Op_MinV:
3709 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3710 return false; // Implementation limitation
3711 }
3712 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3713 return false; // Implementation limitation
3714 }
3715 return true;
3716 case Op_SaturatingAddV:
3717 case Op_SaturatingSubV:
3718 if (!is_subword_type(bt)) {
3719 return false;
3720 }
3721 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorMaskCmp:
3727 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 return true;
3731
3732 case Op_VectorRearrange:
3733 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3734 return false; // Implementation limitation
3735 }
3736 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3737 return false; // Implementation limitation
3738 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3739 return false; // Implementation limitation
3740 }
3741 return true;
3742
3743 // Binary Logical operations
3744 case Op_AndVMask:
3745 case Op_OrVMask:
3746 case Op_XorVMask:
3747 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3748 return false; // Implementation limitation
3749 }
3750 return true;
3751
3752 case Op_PopCountVI:
3753 case Op_PopCountVL:
3754 if (!is_pop_count_instr_target(bt)) {
3755 return false;
3756 }
3757 return true;
3758
3759 case Op_MaskAll:
3760 return true;
3761
3762 case Op_CountLeadingZerosV:
3763 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3764 return true;
3765 }
3766 default:
3767 return false;
3768 }
3769 }
3770
3771 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3772 return false;
3773 }
3774
3775 // Return true if Vector::rearrange needs preparation of the shuffle argument
3776 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3777 switch (elem_bt) {
3778 case T_BYTE: return false;
3779 case T_SHORT: return !VM_Version::supports_avx512bw();
3780 case T_INT: return !VM_Version::supports_avx();
3781 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3782 default:
3783 ShouldNotReachHere();
3784 return false;
3785 }
3786 }
3787
3788 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3789 // Prefer predicate if the mask type is "TypeVectMask".
3790 return vt->isa_vectmask() != nullptr;
3791 }
3792
3793 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3794 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3795 bool legacy = (generic_opnd->opcode() == LEGVEC);
3796 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3797 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3798 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3799 return new legVecZOper();
3800 }
3801 if (legacy) {
3802 switch (ideal_reg) {
3803 case Op_VecS: return new legVecSOper();
3804 case Op_VecD: return new legVecDOper();
3805 case Op_VecX: return new legVecXOper();
3806 case Op_VecY: return new legVecYOper();
3807 case Op_VecZ: return new legVecZOper();
3808 }
3809 } else {
3810 switch (ideal_reg) {
3811 case Op_VecS: return new vecSOper();
3812 case Op_VecD: return new vecDOper();
3813 case Op_VecX: return new vecXOper();
3814 case Op_VecY: return new vecYOper();
3815 case Op_VecZ: return new vecZOper();
3816 }
3817 }
3818 ShouldNotReachHere();
3819 return nullptr;
3820 }
3821
3822 bool Matcher::is_reg2reg_move(MachNode* m) {
3823 switch (m->rule()) {
3824 case MoveVec2Leg_rule:
3825 case MoveLeg2Vec_rule:
3826 case MoveF2VL_rule:
3827 case MoveF2LEG_rule:
3828 case MoveVL2F_rule:
3829 case MoveLEG2F_rule:
3830 case MoveD2VL_rule:
3831 case MoveD2LEG_rule:
3832 case MoveVL2D_rule:
3833 case MoveLEG2D_rule:
3834 return true;
3835 default:
3836 return false;
3837 }
3838 }
3839
3840 bool Matcher::is_generic_vector(MachOper* opnd) {
3841 switch (opnd->opcode()) {
3842 case VEC:
3843 case LEGVEC:
3844 return true;
3845 default:
3846 return false;
3847 }
3848 }
3849
3850 //------------------------------------------------------------------------
3851
3852 const RegMask* Matcher::predicate_reg_mask(void) {
3853 return &_VECTMASK_REG_mask;
3854 }
3855
3856 // Max vector size in bytes. 0 if not supported.
3857 int Matcher::vector_width_in_bytes(BasicType bt) {
3858 assert(is_java_primitive(bt), "only primitive type vectors");
3859 // SSE2 supports 128bit vectors for all types.
3860 // AVX2 supports 256bit vectors for all types.
3861 // AVX2/EVEX supports 512bit vectors for all types.
3862 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3863 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3864 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3865 size = (UseAVX > 2) ? 64 : 32;
3866 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3867 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3868 // Use flag to limit vector size.
3869 size = MIN2(size,(int)MaxVectorSize);
3870 // Minimum 2 values in vector (or 4 for bytes).
3871 switch (bt) {
3872 case T_DOUBLE:
3873 case T_LONG:
3874 if (size < 16) return 0;
3875 break;
3876 case T_FLOAT:
3877 case T_INT:
3878 if (size < 8) return 0;
3879 break;
3880 case T_BOOLEAN:
3881 if (size < 4) return 0;
3882 break;
3883 case T_CHAR:
3884 if (size < 4) return 0;
3885 break;
3886 case T_BYTE:
3887 if (size < 4) return 0;
3888 break;
3889 case T_SHORT:
3890 if (size < 4) return 0;
3891 break;
3892 default:
3893 ShouldNotReachHere();
3894 }
3895 return size;
3896 }
3897
3898 // Limits on vector size (number of elements) loaded into vector.
3899 int Matcher::max_vector_size(const BasicType bt) {
3900 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3901 }
3902 int Matcher::min_vector_size(const BasicType bt) {
3903 int max_size = max_vector_size(bt);
3904 // Min size which can be loaded into vector is 4 bytes.
3905 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3906 // Support for calling svml double64 vectors
3907 if (bt == T_DOUBLE) {
3908 size = 1;
3909 }
3910 return MIN2(size,max_size);
3911 }
3912
3913 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3914 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3915 // by default on Cascade Lake
3916 if (VM_Version::is_default_intel_cascade_lake()) {
3917 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3918 }
3919 return Matcher::max_vector_size(bt);
3920 }
3921
3922 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3923 return -1;
3924 }
3925
3926 // Vector ideal reg corresponding to specified size in bytes
3927 uint Matcher::vector_ideal_reg(int size) {
3928 assert(MaxVectorSize >= size, "");
3929 switch(size) {
3930 case 4: return Op_VecS;
3931 case 8: return Op_VecD;
3932 case 16: return Op_VecX;
3933 case 32: return Op_VecY;
3934 case 64: return Op_VecZ;
3935 }
3936 ShouldNotReachHere();
3937 return 0;
3938 }
3939
3940 // Check for shift by small constant as well
3941 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3942 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3943 shift->in(2)->get_int() <= 3 &&
3944 // Are there other uses besides address expressions?
3945 !matcher->is_visited(shift)) {
3946 address_visited.set(shift->_idx); // Flag as address_visited
3947 mstack.push(shift->in(2), Matcher::Visit);
3948 Node *conv = shift->in(1);
3949 // Allow Matcher to match the rule which bypass
3950 // ConvI2L operation for an array index on LP64
3951 // if the index value is positive.
3952 if (conv->Opcode() == Op_ConvI2L &&
3953 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3954 // Are there other uses besides address expressions?
3955 !matcher->is_visited(conv)) {
3956 address_visited.set(conv->_idx); // Flag as address_visited
3957 mstack.push(conv->in(1), Matcher::Pre_Visit);
3958 } else {
3959 mstack.push(conv, Matcher::Pre_Visit);
3960 }
3961 return true;
3962 }
3963 return false;
3964 }
3965
3966 // This function identifies sub-graphs in which a 'load' node is
3967 // input to two different nodes, and such that it can be matched
3968 // with BMI instructions like blsi, blsr, etc.
3969 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3970 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3971 // refers to the same node.
3972 //
3973 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3974 // This is a temporary solution until we make DAGs expressible in ADL.
3975 template<typename ConType>
3976 class FusedPatternMatcher {
3977 Node* _op1_node;
3978 Node* _mop_node;
3979 int _con_op;
3980
3981 static int match_next(Node* n, int next_op, int next_op_idx) {
3982 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3983 return -1;
3984 }
3985
3986 if (next_op_idx == -1) { // n is commutative, try rotations
3987 if (n->in(1)->Opcode() == next_op) {
3988 return 1;
3989 } else if (n->in(2)->Opcode() == next_op) {
3990 return 2;
3991 }
3992 } else {
3993 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3994 if (n->in(next_op_idx)->Opcode() == next_op) {
3995 return next_op_idx;
3996 }
3997 }
3998 return -1;
3999 }
4000
4001 public:
4002 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
4003 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
4004
4005 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4006 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4007 typename ConType::NativeType con_value) {
4008 if (_op1_node->Opcode() != op1) {
4009 return false;
4010 }
4011 if (_mop_node->outcnt() > 2) {
4012 return false;
4013 }
4014 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4015 if (op1_op2_idx == -1) {
4016 return false;
4017 }
4018 // Memory operation must be the other edge
4019 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4020
4021 // Check that the mop node is really what we want
4022 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4023 Node* op2_node = _op1_node->in(op1_op2_idx);
4024 if (op2_node->outcnt() > 1) {
4025 return false;
4026 }
4027 assert(op2_node->Opcode() == op2, "Should be");
4028 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4029 if (op2_con_idx == -1) {
4030 return false;
4031 }
4032 // Memory operation must be the other edge
4033 int op2_mop_idx = (op2_con_idx & 1) + 1;
4034 // Check that the memory operation is the same node
4035 if (op2_node->in(op2_mop_idx) == _mop_node) {
4036 // Now check the constant
4037 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4038 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4039 return true;
4040 }
4041 }
4042 }
4043 return false;
4044 }
4045 };
4046
4047 static bool is_bmi_pattern(Node* n, Node* m) {
4048 assert(UseBMI1Instructions, "sanity");
4049 if (n != nullptr && m != nullptr) {
4050 if (m->Opcode() == Op_LoadI) {
4051 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4052 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4053 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4054 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4055 } else if (m->Opcode() == Op_LoadL) {
4056 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4057 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4058 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4059 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4060 }
4061 }
4062 return false;
4063 }
4064
4065 // Should the matcher clone input 'm' of node 'n'?
4066 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4067 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4068 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
4069 mstack.push(m, Visit);
4070 return true;
4071 }
4072 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4073 mstack.push(m, Visit); // m = ShiftCntV
4074 return true;
4075 }
4076 if (is_encode_and_store_pattern(n, m)) {
4077 mstack.push(m, Visit);
4078 return true;
4079 }
4080 return false;
4081 }
4082
4083 // Should the Matcher clone shifts on addressing modes, expecting them
4084 // to be subsumed into complex addressing expressions or compute them
4085 // into registers?
4086 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4087 Node *off = m->in(AddPNode::Offset);
4088 if (off->is_Con()) {
4089 address_visited.test_set(m->_idx); // Flag as address_visited
4090 Node *adr = m->in(AddPNode::Address);
4091
4092 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4093 // AtomicAdd is not an addressing expression.
4094 // Cheap to find it by looking for screwy base.
4095 if (adr->is_AddP() &&
4096 !adr->in(AddPNode::Base)->is_top() &&
4097 !adr->in(AddPNode::Offset)->is_Con() &&
4098 off->get_long() == (int) (off->get_long()) && // immL32
4099 // Are there other uses besides address expressions?
4100 !is_visited(adr)) {
4101 address_visited.set(adr->_idx); // Flag as address_visited
4102 Node *shift = adr->in(AddPNode::Offset);
4103 if (!clone_shift(shift, this, mstack, address_visited)) {
4104 mstack.push(shift, Pre_Visit);
4105 }
4106 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4107 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4108 } else {
4109 mstack.push(adr, Pre_Visit);
4110 }
4111
4112 // Clone X+offset as it also folds into most addressing expressions
4113 mstack.push(off, Visit);
4114 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4115 return true;
4116 } else if (clone_shift(off, this, mstack, address_visited)) {
4117 address_visited.test_set(m->_idx); // Flag as address_visited
4118 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4119 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4120 return true;
4121 }
4122 return false;
4123 }
4124
4125 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4126 switch (bt) {
4127 case BoolTest::eq:
4128 return Assembler::eq;
4129 case BoolTest::ne:
4130 return Assembler::neq;
4131 case BoolTest::le:
4132 case BoolTest::ule:
4133 return Assembler::le;
4134 case BoolTest::ge:
4135 case BoolTest::uge:
4136 return Assembler::nlt;
4137 case BoolTest::lt:
4138 case BoolTest::ult:
4139 return Assembler::lt;
4140 case BoolTest::gt:
4141 case BoolTest::ugt:
4142 return Assembler::nle;
4143 default : ShouldNotReachHere(); return Assembler::_false;
4144 }
4145 }
4146
4147 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4148 switch (bt) {
4149 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4150 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4151 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4152 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4153 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4154 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4155 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4156 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4157 }
4158 }
4159
4160 // Helper methods for MachSpillCopyNode::implementation().
4161 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4162 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4163 assert(ireg == Op_VecS || // 32bit vector
4164 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4165 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4166 "no non-adjacent vector moves" );
4167 if (masm) {
4168 switch (ireg) {
4169 case Op_VecS: // copy whole register
4170 case Op_VecD:
4171 case Op_VecX:
4172 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4173 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4174 } else {
4175 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4176 }
4177 break;
4178 case Op_VecY:
4179 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4180 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4181 } else {
4182 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4183 }
4184 break;
4185 case Op_VecZ:
4186 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4187 break;
4188 default:
4189 ShouldNotReachHere();
4190 }
4191 #ifndef PRODUCT
4192 } else {
4193 switch (ireg) {
4194 case Op_VecS:
4195 case Op_VecD:
4196 case Op_VecX:
4197 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4198 break;
4199 case Op_VecY:
4200 case Op_VecZ:
4201 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4202 break;
4203 default:
4204 ShouldNotReachHere();
4205 }
4206 #endif
4207 }
4208 }
4209
4210 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4211 int stack_offset, int reg, uint ireg, outputStream* st) {
4212 if (masm) {
4213 if (is_load) {
4214 switch (ireg) {
4215 case Op_VecS:
4216 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4217 break;
4218 case Op_VecD:
4219 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4220 break;
4221 case Op_VecX:
4222 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4223 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4224 } else {
4225 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4226 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4227 }
4228 break;
4229 case Op_VecY:
4230 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4231 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4232 } else {
4233 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4234 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4235 }
4236 break;
4237 case Op_VecZ:
4238 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4239 break;
4240 default:
4241 ShouldNotReachHere();
4242 }
4243 } else { // store
4244 switch (ireg) {
4245 case Op_VecS:
4246 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4247 break;
4248 case Op_VecD:
4249 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4250 break;
4251 case Op_VecX:
4252 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4253 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4254 }
4255 else {
4256 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4257 }
4258 break;
4259 case Op_VecY:
4260 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4261 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4262 }
4263 else {
4264 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4265 }
4266 break;
4267 case Op_VecZ:
4268 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4269 break;
4270 default:
4271 ShouldNotReachHere();
4272 }
4273 }
4274 #ifndef PRODUCT
4275 } else {
4276 if (is_load) {
4277 switch (ireg) {
4278 case Op_VecS:
4279 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecD:
4282 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4283 break;
4284 case Op_VecX:
4285 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4286 break;
4287 case Op_VecY:
4288 case Op_VecZ:
4289 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4290 break;
4291 default:
4292 ShouldNotReachHere();
4293 }
4294 } else { // store
4295 switch (ireg) {
4296 case Op_VecS:
4297 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecD:
4300 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4301 break;
4302 case Op_VecX:
4303 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4304 break;
4305 case Op_VecY:
4306 case Op_VecZ:
4307 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4308 break;
4309 default:
4310 ShouldNotReachHere();
4311 }
4312 }
4313 #endif
4314 }
4315 }
4316
4317 template <class T>
4318 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4319 int size = type2aelembytes(bt) * len;
4320 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4321 for (int i = 0; i < len; i++) {
4322 int offset = i * type2aelembytes(bt);
4323 switch (bt) {
4324 case T_BYTE: val->at(i) = con; break;
4325 case T_SHORT: {
4326 jshort c = con;
4327 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4328 break;
4329 }
4330 case T_INT: {
4331 jint c = con;
4332 memcpy(val->adr_at(offset), &c, sizeof(jint));
4333 break;
4334 }
4335 case T_LONG: {
4336 jlong c = con;
4337 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4338 break;
4339 }
4340 case T_FLOAT: {
4341 jfloat c = con;
4342 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4343 break;
4344 }
4345 case T_DOUBLE: {
4346 jdouble c = con;
4347 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4348 break;
4349 }
4350 default: assert(false, "%s", type2name(bt));
4351 }
4352 }
4353 return val;
4354 }
4355
4356 static inline jlong high_bit_set(BasicType bt) {
4357 switch (bt) {
4358 case T_BYTE: return 0x8080808080808080;
4359 case T_SHORT: return 0x8000800080008000;
4360 case T_INT: return 0x8000000080000000;
4361 case T_LONG: return 0x8000000000000000;
4362 default:
4363 ShouldNotReachHere();
4364 return 0;
4365 }
4366 }
4367
4368 #ifndef PRODUCT
4369 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4370 st->print("nop \t# %d bytes pad for loops and calls", _count);
4371 }
4372 #endif
4373
4374 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4375 __ nop(_count);
4376 }
4377
4378 uint MachNopNode::size(PhaseRegAlloc*) const {
4379 return _count;
4380 }
4381
4382 #ifndef PRODUCT
4383 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4384 st->print("# breakpoint");
4385 }
4386 #endif
4387
4388 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4389 __ int3();
4390 }
4391
4392 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4393 return MachNode::size(ra_);
4394 }
4395
4396 %}
4397
4398 //----------ENCODING BLOCK-----------------------------------------------------
4399 // This block specifies the encoding classes used by the compiler to
4400 // output byte streams. Encoding classes are parameterized macros
4401 // used by Machine Instruction Nodes in order to generate the bit
4402 // encoding of the instruction. Operands specify their base encoding
4403 // interface with the interface keyword. There are currently
4404 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4405 // COND_INTER. REG_INTER causes an operand to generate a function
4406 // which returns its register number when queried. CONST_INTER causes
4407 // an operand to generate a function which returns the value of the
4408 // constant when queried. MEMORY_INTER causes an operand to generate
4409 // four functions which return the Base Register, the Index Register,
4410 // the Scale Value, and the Offset Value of the operand when queried.
4411 // COND_INTER causes an operand to generate six functions which return
4412 // the encoding code (ie - encoding bits for the instruction)
4413 // associated with each basic boolean condition for a conditional
4414 // instruction.
4415 //
4416 // Instructions specify two basic values for encoding. Again, a
4417 // function is available to check if the constant displacement is an
4418 // oop. They use the ins_encode keyword to specify their encoding
4419 // classes (which must be a sequence of enc_class names, and their
4420 // parameters, specified in the encoding block), and they use the
4421 // opcode keyword to specify, in order, their primary, secondary, and
4422 // tertiary opcode. Only the opcode sections which a particular
4423 // instruction needs for encoding need to be specified.
4424 encode %{
4425 enc_class cdql_enc(no_rax_rdx_RegI div)
4426 %{
4427 // Full implementation of Java idiv and irem; checks for
4428 // special case as described in JVM spec., p.243 & p.271.
4429 //
4430 // normal case special case
4431 //
4432 // input : rax: dividend min_int
4433 // reg: divisor -1
4434 //
4435 // output: rax: quotient (= rax idiv reg) min_int
4436 // rdx: remainder (= rax irem reg) 0
4437 //
4438 // Code sequnce:
4439 //
4440 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4441 // 5: 75 07/08 jne e <normal>
4442 // 7: 33 d2 xor %edx,%edx
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4446 // c: 74 03/04 je 11 <done>
4447 // 000000000000000e <normal>:
4448 // e: 99 cltd
4449 // [div >= 8 -> offset + 1]
4450 // [REX_B]
4451 // f: f7 f9 idiv $div
4452 // 0000000000000011 <done>:
4453 Label normal;
4454 Label done;
4455
4456 // cmp $0x80000000,%eax
4457 __ cmpl(as_Register(RAX_enc), 0x80000000);
4458
4459 // jne e <normal>
4460 __ jccb(Assembler::notEqual, normal);
4461
4462 // xor %edx,%edx
4463 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4464
4465 // cmp $0xffffffffffffffff,%ecx
4466 __ cmpl($div$$Register, -1);
4467
4468 // je 11 <done>
4469 __ jccb(Assembler::equal, done);
4470
4471 // <normal>
4472 // cltd
4473 __ bind(normal);
4474 __ cdql();
4475
4476 // idivl
4477 // <done>
4478 __ idivl($div$$Register);
4479 __ bind(done);
4480 %}
4481
4482 enc_class cdqq_enc(no_rax_rdx_RegL div)
4483 %{
4484 // Full implementation of Java ldiv and lrem; checks for
4485 // special case as described in JVM spec., p.243 & p.271.
4486 //
4487 // normal case special case
4488 //
4489 // input : rax: dividend min_long
4490 // reg: divisor -1
4491 //
4492 // output: rax: quotient (= rax idiv reg) min_long
4493 // rdx: remainder (= rax irem reg) 0
4494 //
4495 // Code sequnce:
4496 //
4497 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4498 // 7: 00 00 80
4499 // a: 48 39 d0 cmp %rdx,%rax
4500 // d: 75 08 jne 17 <normal>
4501 // f: 33 d2 xor %edx,%edx
4502 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4503 // 15: 74 05 je 1c <done>
4504 // 0000000000000017 <normal>:
4505 // 17: 48 99 cqto
4506 // 19: 48 f7 f9 idiv $div
4507 // 000000000000001c <done>:
4508 Label normal;
4509 Label done;
4510
4511 // mov $0x8000000000000000,%rdx
4512 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4513
4514 // cmp %rdx,%rax
4515 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4516
4517 // jne 17 <normal>
4518 __ jccb(Assembler::notEqual, normal);
4519
4520 // xor %edx,%edx
4521 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4522
4523 // cmp $0xffffffffffffffff,$div
4524 __ cmpq($div$$Register, -1);
4525
4526 // je 1e <done>
4527 __ jccb(Assembler::equal, done);
4528
4529 // <normal>
4530 // cqto
4531 __ bind(normal);
4532 __ cdqq();
4533
4534 // idivq (note: must be emitted by the user of this rule)
4535 // <done>
4536 __ idivq($div$$Register);
4537 __ bind(done);
4538 %}
4539
4540 enc_class clear_avx %{
4541 DEBUG_ONLY(int off0 = __ offset());
4542 if (generate_vzeroupper(Compile::current())) {
4543 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4544 // Clear upper bits of YMM registers when current compiled code uses
4545 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4546 __ vzeroupper();
4547 }
4548 DEBUG_ONLY(int off1 = __ offset());
4549 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4550 %}
4551
4552 enc_class Java_To_Runtime(method meth) %{
4553 __ lea(r10, RuntimeAddress((address)$meth$$method));
4554 __ call(r10);
4555 __ post_call_nop();
4556 %}
4557
4558 enc_class Java_Static_Call(method meth)
4559 %{
4560 // JAVA STATIC CALL
4561 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4562 // determine who we intended to call.
4563 if (!_method) {
4564 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4565 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4566 // The NOP here is purely to ensure that eliding a call to
4567 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4568 __ addr_nop_5();
4569 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4570 } else {
4571 int method_index = resolved_method_index(masm);
4572 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4573 : static_call_Relocation::spec(method_index);
4574 address mark = __ pc();
4575 int call_offset = __ offset();
4576 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4577 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4578 // Calls of the same statically bound method can share
4579 // a stub to the interpreter.
4580 __ code()->shared_stub_to_interp_for(_method, call_offset);
4581 } else {
4582 // Emit stubs for static call.
4583 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4584 __ clear_inst_mark();
4585 if (stub == nullptr) {
4586 ciEnv::current()->record_failure("CodeCache is full");
4587 return;
4588 }
4589 }
4590 }
4591 __ post_call_nop();
4592 %}
4593
4594 enc_class Java_Dynamic_Call(method meth) %{
4595 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4596 __ post_call_nop();
4597 %}
4598
4599 enc_class call_epilog %{
4600 if (VerifyStackAtCalls) {
4601 // Check that stack depth is unchanged: find majik cookie on stack
4602 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4603 Label L;
4604 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4605 __ jccb(Assembler::equal, L);
4606 // Die if stack mismatch
4607 __ int3();
4608 __ bind(L);
4609 }
4610 %}
4611
4612 %}
4613
4614 //----------FRAME--------------------------------------------------------------
4615 // Definition of frame structure and management information.
4616 //
4617 // S T A C K L A Y O U T Allocators stack-slot number
4618 // | (to get allocators register number
4619 // G Owned by | | v add OptoReg::stack0())
4620 // r CALLER | |
4621 // o | +--------+ pad to even-align allocators stack-slot
4622 // w V | pad0 | numbers; owned by CALLER
4623 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4624 // h ^ | in | 5
4625 // | | args | 4 Holes in incoming args owned by SELF
4626 // | | | | 3
4627 // | | +--------+
4628 // V | | old out| Empty on Intel, window on Sparc
4629 // | old |preserve| Must be even aligned.
4630 // | SP-+--------+----> Matcher::_old_SP, even aligned
4631 // | | in | 3 area for Intel ret address
4632 // Owned by |preserve| Empty on Sparc.
4633 // SELF +--------+
4634 // | | pad2 | 2 pad to align old SP
4635 // | +--------+ 1
4636 // | | locks | 0
4637 // | +--------+----> OptoReg::stack0(), even aligned
4638 // | | pad1 | 11 pad to align new SP
4639 // | +--------+
4640 // | | | 10
4641 // | | spills | 9 spills
4642 // V | | 8 (pad0 slot for callee)
4643 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4644 // ^ | out | 7
4645 // | | args | 6 Holes in outgoing args owned by CALLEE
4646 // Owned by +--------+
4647 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4648 // | new |preserve| Must be even-aligned.
4649 // | SP-+--------+----> Matcher::_new_SP, even aligned
4650 // | | |
4651 //
4652 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4653 // known from SELF's arguments and the Java calling convention.
4654 // Region 6-7 is determined per call site.
4655 // Note 2: If the calling convention leaves holes in the incoming argument
4656 // area, those holes are owned by SELF. Holes in the outgoing area
4657 // are owned by the CALLEE. Holes should not be necessary in the
4658 // incoming area, as the Java calling convention is completely under
4659 // the control of the AD file. Doubles can be sorted and packed to
4660 // avoid holes. Holes in the outgoing arguments may be necessary for
4661 // varargs C calling conventions.
4662 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4663 // even aligned with pad0 as needed.
4664 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4665 // region 6-11 is even aligned; it may be padded out more so that
4666 // the region from SP to FP meets the minimum stack alignment.
4667 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4668 // alignment. Region 11, pad1, may be dynamically extended so that
4669 // SP meets the minimum alignment.
4670
4671 frame
4672 %{
4673 // These three registers define part of the calling convention
4674 // between compiled code and the interpreter.
4675 inline_cache_reg(RAX); // Inline Cache Register
4676
4677 // Optional: name the operand used by cisc-spilling to access
4678 // [stack_pointer + offset]
4679 cisc_spilling_operand_name(indOffset32);
4680
4681 // Number of stack slots consumed by locking an object
4682 sync_stack_slots(2);
4683
4684 // Compiled code's Frame Pointer
4685 frame_pointer(RSP);
4686
4687 // Stack alignment requirement
4688 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4689
4690 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4691 // for calls to C. Supports the var-args backing area for register parms.
4692 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4693
4694 // The after-PROLOG location of the return address. Location of
4695 // return address specifies a type (REG or STACK) and a number
4696 // representing the register number (i.e. - use a register name) or
4697 // stack slot.
4698 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4699 // Otherwise, it is above the locks and verification slot and alignment word
4700 return_addr(STACK - 2 +
4701 align_up((Compile::current()->in_preserve_stack_slots() +
4702 Compile::current()->fixed_slots()),
4703 stack_alignment_in_slots()));
4704
4705 // Location of compiled Java return values. Same as C for now.
4706 return_value
4707 %{
4708 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4709 "only return normal values");
4710
4711 static const int lo[Op_RegL + 1] = {
4712 0,
4713 0,
4714 RAX_num, // Op_RegN
4715 RAX_num, // Op_RegI
4716 RAX_num, // Op_RegP
4717 XMM0_num, // Op_RegF
4718 XMM0_num, // Op_RegD
4719 RAX_num // Op_RegL
4720 };
4721 static const int hi[Op_RegL + 1] = {
4722 0,
4723 0,
4724 OptoReg::Bad, // Op_RegN
4725 OptoReg::Bad, // Op_RegI
4726 RAX_H_num, // Op_RegP
4727 OptoReg::Bad, // Op_RegF
4728 XMM0b_num, // Op_RegD
4729 RAX_H_num // Op_RegL
4730 };
4731 // Excluded flags and vector registers.
4732 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4733 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4734 %}
4735 %}
4736
4737 //----------ATTRIBUTES---------------------------------------------------------
4738 //----------Operand Attributes-------------------------------------------------
4739 op_attrib op_cost(0); // Required cost attribute
4740
4741 //----------Instruction Attributes---------------------------------------------
4742 ins_attrib ins_cost(100); // Required cost attribute
4743 ins_attrib ins_size(8); // Required size attribute (in bits)
4744 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4745 // a non-matching short branch variant
4746 // of some long branch?
4747 ins_attrib ins_alignment(1); // Required alignment attribute (must
4748 // be a power of 2) specifies the
4749 // alignment that some part of the
4750 // instruction (not necessarily the
4751 // start) requires. If > 1, a
4752 // compute_padding() function must be
4753 // provided for the instruction
4754
4755 // Whether this node is expanded during code emission into a sequence of
4756 // instructions and the first instruction can perform an implicit null check.
4757 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4758
4759 //----------OPERANDS-----------------------------------------------------------
4760 // Operand definitions must precede instruction definitions for correct parsing
4761 // in the ADLC because operands constitute user defined types which are used in
4762 // instruction definitions.
4763
4764 //----------Simple Operands----------------------------------------------------
4765 // Immediate Operands
4766 // Integer Immediate
4767 operand immI()
4768 %{
4769 match(ConI);
4770
4771 op_cost(10);
4772 format %{ %}
4773 interface(CONST_INTER);
4774 %}
4775
4776 // Constant for test vs zero
4777 operand immI_0()
4778 %{
4779 predicate(n->get_int() == 0);
4780 match(ConI);
4781
4782 op_cost(0);
4783 format %{ %}
4784 interface(CONST_INTER);
4785 %}
4786
4787 // Constant for increment
4788 operand immI_1()
4789 %{
4790 predicate(n->get_int() == 1);
4791 match(ConI);
4792
4793 op_cost(0);
4794 format %{ %}
4795 interface(CONST_INTER);
4796 %}
4797
4798 // Constant for decrement
4799 operand immI_M1()
4800 %{
4801 predicate(n->get_int() == -1);
4802 match(ConI);
4803
4804 op_cost(0);
4805 format %{ %}
4806 interface(CONST_INTER);
4807 %}
4808
4809 operand immI_2()
4810 %{
4811 predicate(n->get_int() == 2);
4812 match(ConI);
4813
4814 op_cost(0);
4815 format %{ %}
4816 interface(CONST_INTER);
4817 %}
4818
4819 operand immI_4()
4820 %{
4821 predicate(n->get_int() == 4);
4822 match(ConI);
4823
4824 op_cost(0);
4825 format %{ %}
4826 interface(CONST_INTER);
4827 %}
4828
4829 operand immI_8()
4830 %{
4831 predicate(n->get_int() == 8);
4832 match(ConI);
4833
4834 op_cost(0);
4835 format %{ %}
4836 interface(CONST_INTER);
4837 %}
4838
4839 // Valid scale values for addressing modes
4840 operand immI2()
4841 %{
4842 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4843 match(ConI);
4844
4845 format %{ %}
4846 interface(CONST_INTER);
4847 %}
4848
4849 operand immU7()
4850 %{
4851 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4852 match(ConI);
4853
4854 op_cost(5);
4855 format %{ %}
4856 interface(CONST_INTER);
4857 %}
4858
4859 operand immI8()
4860 %{
4861 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4862 match(ConI);
4863
4864 op_cost(5);
4865 format %{ %}
4866 interface(CONST_INTER);
4867 %}
4868
4869 operand immU8()
4870 %{
4871 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4872 match(ConI);
4873
4874 op_cost(5);
4875 format %{ %}
4876 interface(CONST_INTER);
4877 %}
4878
4879 operand immI16()
4880 %{
4881 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4882 match(ConI);
4883
4884 op_cost(10);
4885 format %{ %}
4886 interface(CONST_INTER);
4887 %}
4888
4889 // Int Immediate non-negative
4890 operand immU31()
4891 %{
4892 predicate(n->get_int() >= 0);
4893 match(ConI);
4894
4895 op_cost(0);
4896 format %{ %}
4897 interface(CONST_INTER);
4898 %}
4899
4900 // Pointer Immediate
4901 operand immP()
4902 %{
4903 match(ConP);
4904
4905 op_cost(10);
4906 format %{ %}
4907 interface(CONST_INTER);
4908 %}
4909
4910 // Null Pointer Immediate
4911 operand immP0()
4912 %{
4913 predicate(n->get_ptr() == 0);
4914 match(ConP);
4915
4916 op_cost(5);
4917 format %{ %}
4918 interface(CONST_INTER);
4919 %}
4920
4921 // Pointer Immediate
4922 operand immN() %{
4923 match(ConN);
4924
4925 op_cost(10);
4926 format %{ %}
4927 interface(CONST_INTER);
4928 %}
4929
4930 operand immNKlass() %{
4931 match(ConNKlass);
4932
4933 op_cost(10);
4934 format %{ %}
4935 interface(CONST_INTER);
4936 %}
4937
4938 // Null Pointer Immediate
4939 operand immN0() %{
4940 predicate(n->get_narrowcon() == 0);
4941 match(ConN);
4942
4943 op_cost(5);
4944 format %{ %}
4945 interface(CONST_INTER);
4946 %}
4947
4948 operand immP31()
4949 %{
4950 predicate(n->as_Type()->type()->reloc() == relocInfo::none
4951 && (n->get_ptr() >> 31) == 0);
4952 match(ConP);
4953
4954 op_cost(5);
4955 format %{ %}
4956 interface(CONST_INTER);
4957 %}
4958
4959
4960 // Long Immediate
4961 operand immL()
4962 %{
4963 match(ConL);
4964
4965 op_cost(20);
4966 format %{ %}
4967 interface(CONST_INTER);
4968 %}
4969
4970 // Long Immediate 8-bit
4971 operand immL8()
4972 %{
4973 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4974 match(ConL);
4975
4976 op_cost(5);
4977 format %{ %}
4978 interface(CONST_INTER);
4979 %}
4980
4981 // Long Immediate 32-bit unsigned
4982 operand immUL32()
4983 %{
4984 predicate(n->get_long() == (unsigned int) (n->get_long()));
4985 match(ConL);
4986
4987 op_cost(10);
4988 format %{ %}
4989 interface(CONST_INTER);
4990 %}
4991
4992 // Long Immediate 32-bit signed
4993 operand immL32()
4994 %{
4995 predicate(n->get_long() == (int) (n->get_long()));
4996 match(ConL);
4997
4998 op_cost(15);
4999 format %{ %}
5000 interface(CONST_INTER);
5001 %}
5002
5003 operand immL_Pow2()
5004 %{
5005 predicate(is_power_of_2((julong)n->get_long()));
5006 match(ConL);
5007
5008 op_cost(15);
5009 format %{ %}
5010 interface(CONST_INTER);
5011 %}
5012
5013 operand immL_NotPow2()
5014 %{
5015 predicate(is_power_of_2((julong)~n->get_long()));
5016 match(ConL);
5017
5018 op_cost(15);
5019 format %{ %}
5020 interface(CONST_INTER);
5021 %}
5022
5023 // Long Immediate zero
5024 operand immL0()
5025 %{
5026 predicate(n->get_long() == 0L);
5027 match(ConL);
5028
5029 op_cost(10);
5030 format %{ %}
5031 interface(CONST_INTER);
5032 %}
5033
5034 // Constant for increment
5035 operand immL1()
5036 %{
5037 predicate(n->get_long() == 1);
5038 match(ConL);
5039
5040 format %{ %}
5041 interface(CONST_INTER);
5042 %}
5043
5044 // Constant for decrement
5045 operand immL_M1()
5046 %{
5047 predicate(n->get_long() == -1);
5048 match(ConL);
5049
5050 format %{ %}
5051 interface(CONST_INTER);
5052 %}
5053
5054 // Long Immediate: low 32-bit mask
5055 operand immL_32bits()
5056 %{
5057 predicate(n->get_long() == 0xFFFFFFFFL);
5058 match(ConL);
5059 op_cost(20);
5060
5061 format %{ %}
5062 interface(CONST_INTER);
5063 %}
5064
5065 // Int Immediate: 2^n-1, positive
5066 operand immI_Pow2M1()
5067 %{
5068 predicate((n->get_int() > 0)
5069 && is_power_of_2((juint)n->get_int() + 1));
5070 match(ConI);
5071
5072 op_cost(20);
5073 format %{ %}
5074 interface(CONST_INTER);
5075 %}
5076
5077 // Float Immediate zero
5078 operand immF0()
5079 %{
5080 predicate(jint_cast(n->getf()) == 0);
5081 match(ConF);
5082
5083 op_cost(5);
5084 format %{ %}
5085 interface(CONST_INTER);
5086 %}
5087
5088 // Float Immediate
5089 operand immF()
5090 %{
5091 match(ConF);
5092
5093 op_cost(15);
5094 format %{ %}
5095 interface(CONST_INTER);
5096 %}
5097
5098 // Half Float Immediate
5099 operand immH()
5100 %{
5101 match(ConH);
5102
5103 op_cost(15);
5104 format %{ %}
5105 interface(CONST_INTER);
5106 %}
5107
5108 // Double Immediate zero
5109 operand immD0()
5110 %{
5111 predicate(jlong_cast(n->getd()) == 0);
5112 match(ConD);
5113
5114 op_cost(5);
5115 format %{ %}
5116 interface(CONST_INTER);
5117 %}
5118
5119 // Double Immediate
5120 operand immD()
5121 %{
5122 match(ConD);
5123
5124 op_cost(15);
5125 format %{ %}
5126 interface(CONST_INTER);
5127 %}
5128
5129 // Immediates for special shifts (sign extend)
5130
5131 // Constants for increment
5132 operand immI_16()
5133 %{
5134 predicate(n->get_int() == 16);
5135 match(ConI);
5136
5137 format %{ %}
5138 interface(CONST_INTER);
5139 %}
5140
5141 operand immI_24()
5142 %{
5143 predicate(n->get_int() == 24);
5144 match(ConI);
5145
5146 format %{ %}
5147 interface(CONST_INTER);
5148 %}
5149
5150 // Constant for byte-wide masking
5151 operand immI_255()
5152 %{
5153 predicate(n->get_int() == 255);
5154 match(ConI);
5155
5156 format %{ %}
5157 interface(CONST_INTER);
5158 %}
5159
5160 // Constant for short-wide masking
5161 operand immI_65535()
5162 %{
5163 predicate(n->get_int() == 65535);
5164 match(ConI);
5165
5166 format %{ %}
5167 interface(CONST_INTER);
5168 %}
5169
5170 // Constant for byte-wide masking
5171 operand immL_255()
5172 %{
5173 predicate(n->get_long() == 255);
5174 match(ConL);
5175
5176 format %{ %}
5177 interface(CONST_INTER);
5178 %}
5179
5180 // Constant for short-wide masking
5181 operand immL_65535()
5182 %{
5183 predicate(n->get_long() == 65535);
5184 match(ConL);
5185
5186 format %{ %}
5187 interface(CONST_INTER);
5188 %}
5189
5190 // AOT Runtime Constants Address
5191 operand immAOTRuntimeConstantsAddress()
5192 %{
5193 // Check if the address is in the range of AOT Runtime Constants
5194 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5195 match(ConP);
5196
5197 op_cost(0);
5198 format %{ %}
5199 interface(CONST_INTER);
5200 %}
5201
5202 operand kReg()
5203 %{
5204 constraint(ALLOC_IN_RC(vectmask_reg));
5205 match(RegVectMask);
5206 format %{%}
5207 interface(REG_INTER);
5208 %}
5209
5210 // Register Operands
5211 // Integer Register
5212 operand rRegI()
5213 %{
5214 constraint(ALLOC_IN_RC(int_reg));
5215 match(RegI);
5216
5217 match(rax_RegI);
5218 match(rbx_RegI);
5219 match(rcx_RegI);
5220 match(rdx_RegI);
5221 match(rdi_RegI);
5222
5223 format %{ %}
5224 interface(REG_INTER);
5225 %}
5226
5227 // Special Registers
5228 operand rax_RegI()
5229 %{
5230 constraint(ALLOC_IN_RC(int_rax_reg));
5231 match(RegI);
5232 match(rRegI);
5233
5234 format %{ "RAX" %}
5235 interface(REG_INTER);
5236 %}
5237
5238 // Special Registers
5239 operand rbx_RegI()
5240 %{
5241 constraint(ALLOC_IN_RC(int_rbx_reg));
5242 match(RegI);
5243 match(rRegI);
5244
5245 format %{ "RBX" %}
5246 interface(REG_INTER);
5247 %}
5248
5249 operand rcx_RegI()
5250 %{
5251 constraint(ALLOC_IN_RC(int_rcx_reg));
5252 match(RegI);
5253 match(rRegI);
5254
5255 format %{ "RCX" %}
5256 interface(REG_INTER);
5257 %}
5258
5259 operand rdx_RegI()
5260 %{
5261 constraint(ALLOC_IN_RC(int_rdx_reg));
5262 match(RegI);
5263 match(rRegI);
5264
5265 format %{ "RDX" %}
5266 interface(REG_INTER);
5267 %}
5268
5269 operand rdi_RegI()
5270 %{
5271 constraint(ALLOC_IN_RC(int_rdi_reg));
5272 match(RegI);
5273 match(rRegI);
5274
5275 format %{ "RDI" %}
5276 interface(REG_INTER);
5277 %}
5278
5279 operand no_rax_rdx_RegI()
5280 %{
5281 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5282 match(RegI);
5283 match(rbx_RegI);
5284 match(rcx_RegI);
5285 match(rdi_RegI);
5286
5287 format %{ %}
5288 interface(REG_INTER);
5289 %}
5290
5291 operand no_rbp_r13_RegI()
5292 %{
5293 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5294 match(RegI);
5295 match(rRegI);
5296 match(rax_RegI);
5297 match(rbx_RegI);
5298 match(rcx_RegI);
5299 match(rdx_RegI);
5300 match(rdi_RegI);
5301
5302 format %{ %}
5303 interface(REG_INTER);
5304 %}
5305
5306 // Pointer Register
5307 operand any_RegP()
5308 %{
5309 constraint(ALLOC_IN_RC(any_reg));
5310 match(RegP);
5311 match(rax_RegP);
5312 match(rbx_RegP);
5313 match(rdi_RegP);
5314 match(rsi_RegP);
5315 match(rbp_RegP);
5316 match(r15_RegP);
5317 match(rRegP);
5318
5319 format %{ %}
5320 interface(REG_INTER);
5321 %}
5322
5323 operand rRegP()
5324 %{
5325 constraint(ALLOC_IN_RC(ptr_reg));
5326 match(RegP);
5327 match(rax_RegP);
5328 match(rbx_RegP);
5329 match(rdi_RegP);
5330 match(rsi_RegP);
5331 match(rbp_RegP); // See Q&A below about
5332 match(r15_RegP); // r15_RegP and rbp_RegP.
5333
5334 format %{ %}
5335 interface(REG_INTER);
5336 %}
5337
5338 operand rRegN() %{
5339 constraint(ALLOC_IN_RC(int_reg));
5340 match(RegN);
5341
5342 format %{ %}
5343 interface(REG_INTER);
5344 %}
5345
5346 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5347 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5348 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5349 // The output of an instruction is controlled by the allocator, which respects
5350 // register class masks, not match rules. Unless an instruction mentions
5351 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5352 // by the allocator as an input.
5353 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5354 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5355 // result, RBP is not included in the output of the instruction either.
5356
5357 // This operand is not allowed to use RBP even if
5358 // RBP is not used to hold the frame pointer.
5359 operand no_rbp_RegP()
5360 %{
5361 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5362 match(RegP);
5363 match(rbx_RegP);
5364 match(rsi_RegP);
5365 match(rdi_RegP);
5366
5367 format %{ %}
5368 interface(REG_INTER);
5369 %}
5370
5371 // Special Registers
5372 // Return a pointer value
5373 operand rax_RegP()
5374 %{
5375 constraint(ALLOC_IN_RC(ptr_rax_reg));
5376 match(RegP);
5377 match(rRegP);
5378
5379 format %{ %}
5380 interface(REG_INTER);
5381 %}
5382
5383 // Special Registers
5384 // Return a compressed pointer value
5385 operand rax_RegN()
5386 %{
5387 constraint(ALLOC_IN_RC(int_rax_reg));
5388 match(RegN);
5389 match(rRegN);
5390
5391 format %{ %}
5392 interface(REG_INTER);
5393 %}
5394
5395 // Used in AtomicAdd
5396 operand rbx_RegP()
5397 %{
5398 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5399 match(RegP);
5400 match(rRegP);
5401
5402 format %{ %}
5403 interface(REG_INTER);
5404 %}
5405
5406 operand rsi_RegP()
5407 %{
5408 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5409 match(RegP);
5410 match(rRegP);
5411
5412 format %{ %}
5413 interface(REG_INTER);
5414 %}
5415
5416 operand rbp_RegP()
5417 %{
5418 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5419 match(RegP);
5420 match(rRegP);
5421
5422 format %{ %}
5423 interface(REG_INTER);
5424 %}
5425
5426 // Used in rep stosq
5427 operand rdi_RegP()
5428 %{
5429 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5430 match(RegP);
5431 match(rRegP);
5432
5433 format %{ %}
5434 interface(REG_INTER);
5435 %}
5436
5437 operand r15_RegP()
5438 %{
5439 constraint(ALLOC_IN_RC(ptr_r15_reg));
5440 match(RegP);
5441 match(rRegP);
5442
5443 format %{ %}
5444 interface(REG_INTER);
5445 %}
5446
5447 operand rRegL()
5448 %{
5449 constraint(ALLOC_IN_RC(long_reg));
5450 match(RegL);
5451 match(rax_RegL);
5452 match(rdx_RegL);
5453
5454 format %{ %}
5455 interface(REG_INTER);
5456 %}
5457
5458 // Special Registers
5459 operand no_rax_rdx_RegL()
5460 %{
5461 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5462 match(RegL);
5463 match(rRegL);
5464
5465 format %{ %}
5466 interface(REG_INTER);
5467 %}
5468
5469 operand rax_RegL()
5470 %{
5471 constraint(ALLOC_IN_RC(long_rax_reg));
5472 match(RegL);
5473 match(rRegL);
5474
5475 format %{ "RAX" %}
5476 interface(REG_INTER);
5477 %}
5478
5479 operand rcx_RegL()
5480 %{
5481 constraint(ALLOC_IN_RC(long_rcx_reg));
5482 match(RegL);
5483 match(rRegL);
5484
5485 format %{ %}
5486 interface(REG_INTER);
5487 %}
5488
5489 operand rdx_RegL()
5490 %{
5491 constraint(ALLOC_IN_RC(long_rdx_reg));
5492 match(RegL);
5493 match(rRegL);
5494
5495 format %{ %}
5496 interface(REG_INTER);
5497 %}
5498
5499 operand r11_RegL()
5500 %{
5501 constraint(ALLOC_IN_RC(long_r11_reg));
5502 match(RegL);
5503 match(rRegL);
5504
5505 format %{ %}
5506 interface(REG_INTER);
5507 %}
5508
5509 operand no_rbp_r13_RegL()
5510 %{
5511 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5512 match(RegL);
5513 match(rRegL);
5514 match(rax_RegL);
5515 match(rcx_RegL);
5516 match(rdx_RegL);
5517
5518 format %{ %}
5519 interface(REG_INTER);
5520 %}
5521
5522 // Flags register, used as output of compare instructions
5523 operand rFlagsReg()
5524 %{
5525 constraint(ALLOC_IN_RC(int_flags));
5526 match(RegFlags);
5527
5528 format %{ "RFLAGS" %}
5529 interface(REG_INTER);
5530 %}
5531
5532 // Flags register, used as output of FLOATING POINT compare instructions
5533 operand rFlagsRegU()
5534 %{
5535 constraint(ALLOC_IN_RC(int_flags));
5536 match(RegFlags);
5537
5538 format %{ "RFLAGS_U" %}
5539 interface(REG_INTER);
5540 %}
5541
5542 operand rFlagsRegUCF() %{
5543 constraint(ALLOC_IN_RC(int_flags));
5544 match(RegFlags);
5545 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5546
5547 format %{ "RFLAGS_U_CF" %}
5548 interface(REG_INTER);
5549 %}
5550
5551 operand rFlagsRegUCFE() %{
5552 constraint(ALLOC_IN_RC(int_flags));
5553 match(RegFlags);
5554 predicate(UseAPX && VM_Version::supports_avx10_2());
5555
5556 format %{ "RFLAGS_U_CFE" %}
5557 interface(REG_INTER);
5558 %}
5559
5560 // Float register operands
5561 operand regF() %{
5562 constraint(ALLOC_IN_RC(float_reg));
5563 match(RegF);
5564
5565 format %{ %}
5566 interface(REG_INTER);
5567 %}
5568
5569 // Float register operands
5570 operand legRegF() %{
5571 constraint(ALLOC_IN_RC(float_reg_legacy));
5572 match(RegF);
5573
5574 format %{ %}
5575 interface(REG_INTER);
5576 %}
5577
5578 // Float register operands
5579 operand vlRegF() %{
5580 constraint(ALLOC_IN_RC(float_reg_vl));
5581 match(RegF);
5582
5583 format %{ %}
5584 interface(REG_INTER);
5585 %}
5586
5587 // Double register operands
5588 operand regD() %{
5589 constraint(ALLOC_IN_RC(double_reg));
5590 match(RegD);
5591
5592 format %{ %}
5593 interface(REG_INTER);
5594 %}
5595
5596 // Double register operands
5597 operand legRegD() %{
5598 constraint(ALLOC_IN_RC(double_reg_legacy));
5599 match(RegD);
5600
5601 format %{ %}
5602 interface(REG_INTER);
5603 %}
5604
5605 // Double register operands
5606 operand vlRegD() %{
5607 constraint(ALLOC_IN_RC(double_reg_vl));
5608 match(RegD);
5609
5610 format %{ %}
5611 interface(REG_INTER);
5612 %}
5613
5614 //----------Memory Operands----------------------------------------------------
5615 // Direct Memory Operand
5616 // operand direct(immP addr)
5617 // %{
5618 // match(addr);
5619
5620 // format %{ "[$addr]" %}
5621 // interface(MEMORY_INTER) %{
5622 // base(0xFFFFFFFF);
5623 // index(0x4);
5624 // scale(0x0);
5625 // disp($addr);
5626 // %}
5627 // %}
5628
5629 // Indirect Memory Operand
5630 operand indirect(any_RegP reg)
5631 %{
5632 constraint(ALLOC_IN_RC(ptr_reg));
5633 match(reg);
5634
5635 format %{ "[$reg]" %}
5636 interface(MEMORY_INTER) %{
5637 base($reg);
5638 index(0x4);
5639 scale(0x0);
5640 disp(0x0);
5641 %}
5642 %}
5643
5644 // Indirect Memory Plus Short Offset Operand
5645 operand indOffset8(any_RegP reg, immL8 off)
5646 %{
5647 constraint(ALLOC_IN_RC(ptr_reg));
5648 match(AddP reg off);
5649
5650 format %{ "[$reg + $off (8-bit)]" %}
5651 interface(MEMORY_INTER) %{
5652 base($reg);
5653 index(0x4);
5654 scale(0x0);
5655 disp($off);
5656 %}
5657 %}
5658
5659 // Indirect Memory Plus Long Offset Operand
5660 operand indOffset32(any_RegP reg, immL32 off)
5661 %{
5662 constraint(ALLOC_IN_RC(ptr_reg));
5663 match(AddP reg off);
5664
5665 format %{ "[$reg + $off (32-bit)]" %}
5666 interface(MEMORY_INTER) %{
5667 base($reg);
5668 index(0x4);
5669 scale(0x0);
5670 disp($off);
5671 %}
5672 %}
5673
5674 // Indirect Memory Plus Index Register Plus Offset Operand
5675 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5676 %{
5677 constraint(ALLOC_IN_RC(ptr_reg));
5678 match(AddP (AddP reg lreg) off);
5679
5680 op_cost(10);
5681 format %{"[$reg + $off + $lreg]" %}
5682 interface(MEMORY_INTER) %{
5683 base($reg);
5684 index($lreg);
5685 scale(0x0);
5686 disp($off);
5687 %}
5688 %}
5689
5690 // Indirect Memory Plus Index Register Plus Offset Operand
5691 operand indIndex(any_RegP reg, rRegL lreg)
5692 %{
5693 constraint(ALLOC_IN_RC(ptr_reg));
5694 match(AddP reg lreg);
5695
5696 op_cost(10);
5697 format %{"[$reg + $lreg]" %}
5698 interface(MEMORY_INTER) %{
5699 base($reg);
5700 index($lreg);
5701 scale(0x0);
5702 disp(0x0);
5703 %}
5704 %}
5705
5706 // Indirect Memory Times Scale Plus Index Register
5707 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5708 %{
5709 constraint(ALLOC_IN_RC(ptr_reg));
5710 match(AddP reg (LShiftL lreg scale));
5711
5712 op_cost(10);
5713 format %{"[$reg + $lreg << $scale]" %}
5714 interface(MEMORY_INTER) %{
5715 base($reg);
5716 index($lreg);
5717 scale($scale);
5718 disp(0x0);
5719 %}
5720 %}
5721
5722 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5723 %{
5724 constraint(ALLOC_IN_RC(ptr_reg));
5725 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5726 match(AddP reg (LShiftL (ConvI2L idx) scale));
5727
5728 op_cost(10);
5729 format %{"[$reg + pos $idx << $scale]" %}
5730 interface(MEMORY_INTER) %{
5731 base($reg);
5732 index($idx);
5733 scale($scale);
5734 disp(0x0);
5735 %}
5736 %}
5737
5738 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5739 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5740 %{
5741 constraint(ALLOC_IN_RC(ptr_reg));
5742 match(AddP (AddP reg (LShiftL lreg scale)) off);
5743
5744 op_cost(10);
5745 format %{"[$reg + $off + $lreg << $scale]" %}
5746 interface(MEMORY_INTER) %{
5747 base($reg);
5748 index($lreg);
5749 scale($scale);
5750 disp($off);
5751 %}
5752 %}
5753
5754 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5755 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5756 %{
5757 constraint(ALLOC_IN_RC(ptr_reg));
5758 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5759 match(AddP (AddP reg (ConvI2L idx)) off);
5760
5761 op_cost(10);
5762 format %{"[$reg + $off + $idx]" %}
5763 interface(MEMORY_INTER) %{
5764 base($reg);
5765 index($idx);
5766 scale(0x0);
5767 disp($off);
5768 %}
5769 %}
5770
5771 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5772 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5773 %{
5774 constraint(ALLOC_IN_RC(ptr_reg));
5775 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5776 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5777
5778 op_cost(10);
5779 format %{"[$reg + $off + $idx << $scale]" %}
5780 interface(MEMORY_INTER) %{
5781 base($reg);
5782 index($idx);
5783 scale($scale);
5784 disp($off);
5785 %}
5786 %}
5787
5788 // Indirect Narrow Oop Plus Offset Operand
5789 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5790 // we can't free r12 even with CompressedOops::base() == nullptr.
5791 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5792 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5793 constraint(ALLOC_IN_RC(ptr_reg));
5794 match(AddP (DecodeN reg) off);
5795
5796 op_cost(10);
5797 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5798 interface(MEMORY_INTER) %{
5799 base(0xc); // R12
5800 index($reg);
5801 scale(0x3);
5802 disp($off);
5803 %}
5804 %}
5805
5806 // Indirect Memory Operand
5807 operand indirectNarrow(rRegN reg)
5808 %{
5809 predicate(CompressedOops::shift() == 0);
5810 constraint(ALLOC_IN_RC(ptr_reg));
5811 match(DecodeN reg);
5812
5813 format %{ "[$reg]" %}
5814 interface(MEMORY_INTER) %{
5815 base($reg);
5816 index(0x4);
5817 scale(0x0);
5818 disp(0x0);
5819 %}
5820 %}
5821
5822 // Indirect Memory Plus Short Offset Operand
5823 operand indOffset8Narrow(rRegN reg, immL8 off)
5824 %{
5825 predicate(CompressedOops::shift() == 0);
5826 constraint(ALLOC_IN_RC(ptr_reg));
5827 match(AddP (DecodeN reg) off);
5828
5829 format %{ "[$reg + $off (8-bit)]" %}
5830 interface(MEMORY_INTER) %{
5831 base($reg);
5832 index(0x4);
5833 scale(0x0);
5834 disp($off);
5835 %}
5836 %}
5837
5838 // Indirect Memory Plus Long Offset Operand
5839 operand indOffset32Narrow(rRegN reg, immL32 off)
5840 %{
5841 predicate(CompressedOops::shift() == 0);
5842 constraint(ALLOC_IN_RC(ptr_reg));
5843 match(AddP (DecodeN reg) off);
5844
5845 format %{ "[$reg + $off (32-bit)]" %}
5846 interface(MEMORY_INTER) %{
5847 base($reg);
5848 index(0x4);
5849 scale(0x0);
5850 disp($off);
5851 %}
5852 %}
5853
5854 // Indirect Memory Plus Index Register Plus Offset Operand
5855 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5856 %{
5857 predicate(CompressedOops::shift() == 0);
5858 constraint(ALLOC_IN_RC(ptr_reg));
5859 match(AddP (AddP (DecodeN reg) lreg) off);
5860
5861 op_cost(10);
5862 format %{"[$reg + $off + $lreg]" %}
5863 interface(MEMORY_INTER) %{
5864 base($reg);
5865 index($lreg);
5866 scale(0x0);
5867 disp($off);
5868 %}
5869 %}
5870
5871 // Indirect Memory Plus Index Register Plus Offset Operand
5872 operand indIndexNarrow(rRegN reg, rRegL lreg)
5873 %{
5874 predicate(CompressedOops::shift() == 0);
5875 constraint(ALLOC_IN_RC(ptr_reg));
5876 match(AddP (DecodeN reg) lreg);
5877
5878 op_cost(10);
5879 format %{"[$reg + $lreg]" %}
5880 interface(MEMORY_INTER) %{
5881 base($reg);
5882 index($lreg);
5883 scale(0x0);
5884 disp(0x0);
5885 %}
5886 %}
5887
5888 // Indirect Memory Times Scale Plus Index Register
5889 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5890 %{
5891 predicate(CompressedOops::shift() == 0);
5892 constraint(ALLOC_IN_RC(ptr_reg));
5893 match(AddP (DecodeN reg) (LShiftL lreg scale));
5894
5895 op_cost(10);
5896 format %{"[$reg + $lreg << $scale]" %}
5897 interface(MEMORY_INTER) %{
5898 base($reg);
5899 index($lreg);
5900 scale($scale);
5901 disp(0x0);
5902 %}
5903 %}
5904
5905 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5906 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5907 %{
5908 predicate(CompressedOops::shift() == 0);
5909 constraint(ALLOC_IN_RC(ptr_reg));
5910 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5911
5912 op_cost(10);
5913 format %{"[$reg + $off + $lreg << $scale]" %}
5914 interface(MEMORY_INTER) %{
5915 base($reg);
5916 index($lreg);
5917 scale($scale);
5918 disp($off);
5919 %}
5920 %}
5921
5922 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5923 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5924 %{
5925 constraint(ALLOC_IN_RC(ptr_reg));
5926 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5927 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5928
5929 op_cost(10);
5930 format %{"[$reg + $off + $idx]" %}
5931 interface(MEMORY_INTER) %{
5932 base($reg);
5933 index($idx);
5934 scale(0x0);
5935 disp($off);
5936 %}
5937 %}
5938
5939 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5940 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5941 %{
5942 constraint(ALLOC_IN_RC(ptr_reg));
5943 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5944 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5945
5946 op_cost(10);
5947 format %{"[$reg + $off + $idx << $scale]" %}
5948 interface(MEMORY_INTER) %{
5949 base($reg);
5950 index($idx);
5951 scale($scale);
5952 disp($off);
5953 %}
5954 %}
5955
5956 //----------Special Memory Operands--------------------------------------------
5957 // Stack Slot Operand - This operand is used for loading and storing temporary
5958 // values on the stack where a match requires a value to
5959 // flow through memory.
5960 operand stackSlotP(sRegP reg)
5961 %{
5962 constraint(ALLOC_IN_RC(stack_slots));
5963 // No match rule because this operand is only generated in matching
5964
5965 format %{ "[$reg]" %}
5966 interface(MEMORY_INTER) %{
5967 base(0x4); // RSP
5968 index(0x4); // No Index
5969 scale(0x0); // No Scale
5970 disp($reg); // Stack Offset
5971 %}
5972 %}
5973
5974 operand stackSlotI(sRegI reg)
5975 %{
5976 constraint(ALLOC_IN_RC(stack_slots));
5977 // No match rule because this operand is only generated in matching
5978
5979 format %{ "[$reg]" %}
5980 interface(MEMORY_INTER) %{
5981 base(0x4); // RSP
5982 index(0x4); // No Index
5983 scale(0x0); // No Scale
5984 disp($reg); // Stack Offset
5985 %}
5986 %}
5987
5988 operand stackSlotF(sRegF reg)
5989 %{
5990 constraint(ALLOC_IN_RC(stack_slots));
5991 // No match rule because this operand is only generated in matching
5992
5993 format %{ "[$reg]" %}
5994 interface(MEMORY_INTER) %{
5995 base(0x4); // RSP
5996 index(0x4); // No Index
5997 scale(0x0); // No Scale
5998 disp($reg); // Stack Offset
5999 %}
6000 %}
6001
6002 operand stackSlotD(sRegD reg)
6003 %{
6004 constraint(ALLOC_IN_RC(stack_slots));
6005 // No match rule because this operand is only generated in matching
6006
6007 format %{ "[$reg]" %}
6008 interface(MEMORY_INTER) %{
6009 base(0x4); // RSP
6010 index(0x4); // No Index
6011 scale(0x0); // No Scale
6012 disp($reg); // Stack Offset
6013 %}
6014 %}
6015 operand stackSlotL(sRegL reg)
6016 %{
6017 constraint(ALLOC_IN_RC(stack_slots));
6018 // No match rule because this operand is only generated in matching
6019
6020 format %{ "[$reg]" %}
6021 interface(MEMORY_INTER) %{
6022 base(0x4); // RSP
6023 index(0x4); // No Index
6024 scale(0x0); // No Scale
6025 disp($reg); // Stack Offset
6026 %}
6027 %}
6028
6029 //----------Conditional Branch Operands----------------------------------------
6030 // Comparison Op - This is the operation of the comparison, and is limited to
6031 // the following set of codes:
6032 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6033 //
6034 // Other attributes of the comparison, such as unsignedness, are specified
6035 // by the comparison instruction that sets a condition code flags register.
6036 // That result is represented by a flags operand whose subtype is appropriate
6037 // to the unsignedness (etc.) of the comparison.
6038 //
6039 // Later, the instruction which matches both the Comparison Op (a Bool) and
6040 // the flags (produced by the Cmp) specifies the coding of the comparison op
6041 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6042
6043 // Comparison Code
6044 operand cmpOp()
6045 %{
6046 match(Bool);
6047
6048 format %{ "" %}
6049 interface(COND_INTER) %{
6050 equal(0x4, "e");
6051 not_equal(0x5, "ne");
6052 less(0xc, "l");
6053 greater_equal(0xd, "ge");
6054 less_equal(0xe, "le");
6055 greater(0xf, "g");
6056 overflow(0x0, "o");
6057 no_overflow(0x1, "no");
6058 %}
6059 %}
6060
6061 // Comparison Code, unsigned compare. Used by FP also, with
6062 // C2 (unordered) turned into GT or LT already. The other bits
6063 // C0 and C3 are turned into Carry & Zero flags.
6064 operand cmpOpU()
6065 %{
6066 match(Bool);
6067
6068 format %{ "" %}
6069 interface(COND_INTER) %{
6070 equal(0x4, "e");
6071 not_equal(0x5, "ne");
6072 less(0x2, "b");
6073 greater_equal(0x3, "ae");
6074 less_equal(0x6, "be");
6075 greater(0x7, "a");
6076 overflow(0x0, "o");
6077 no_overflow(0x1, "no");
6078 %}
6079 %}
6080
6081
6082 // Floating comparisons that don't require any fixup for the unordered case,
6083 // If both inputs of the comparison are the same, ZF is always set so we
6084 // don't need to use cmpOpUCF2 for eq/ne
6085 operand cmpOpUCF() %{
6086 match(Bool);
6087 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6088 (n->as_Bool()->_test._test == BoolTest::lt ||
6089 n->as_Bool()->_test._test == BoolTest::ge ||
6090 n->as_Bool()->_test._test == BoolTest::le ||
6091 n->as_Bool()->_test._test == BoolTest::gt ||
6092 n->in(1)->in(1) == n->in(1)->in(2)));
6093 format %{ "" %}
6094 interface(COND_INTER) %{
6095 equal(0xb, "np");
6096 not_equal(0xa, "p");
6097 less(0x2, "b");
6098 greater_equal(0x3, "ae");
6099 less_equal(0x6, "be");
6100 greater(0x7, "a");
6101 overflow(0x0, "o");
6102 no_overflow(0x1, "no");
6103 %}
6104 %}
6105
6106
6107 // Floating comparisons that can be fixed up with extra conditional jumps
6108 operand cmpOpUCF2() %{
6109 match(Bool);
6110 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6111 (n->as_Bool()->_test._test == BoolTest::ne ||
6112 n->as_Bool()->_test._test == BoolTest::eq) &&
6113 n->in(1)->in(1) != n->in(1)->in(2));
6114 format %{ "" %}
6115 interface(COND_INTER) %{
6116 equal(0x4, "e");
6117 not_equal(0x5, "ne");
6118 less(0x2, "b");
6119 greater_equal(0x3, "ae");
6120 less_equal(0x6, "be");
6121 greater(0x7, "a");
6122 overflow(0x0, "o");
6123 no_overflow(0x1, "no");
6124 %}
6125 %}
6126
6127
6128 // Floating point comparisons that set condition flags to test more directly,
6129 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6130 // are used for L (<) and LE (<=) conditions. It's important to convert these
6131 // latter conditions to ones that use unsigned tests before passing into an
6132 // instruction because the preceding comparison might be based on a three way
6133 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6134 operand cmpOpUCFE()
6135 %{
6136 match(Bool);
6137 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6138 (n->as_Bool()->_test._test == BoolTest::ne ||
6139 n->as_Bool()->_test._test == BoolTest::eq ||
6140 n->as_Bool()->_test._test == BoolTest::lt ||
6141 n->as_Bool()->_test._test == BoolTest::ge ||
6142 n->as_Bool()->_test._test == BoolTest::le ||
6143 n->as_Bool()->_test._test == BoolTest::gt));
6144
6145 format %{ "" %}
6146 interface(COND_INTER) %{
6147 equal(0x4, "e");
6148 not_equal(0x5, "ne");
6149 less(0x2, "b");
6150 greater_equal(0x3, "ae");
6151 less_equal(0x6, "be");
6152 greater(0x7, "a");
6153 overflow(0x0, "o");
6154 no_overflow(0x1, "no");
6155 %}
6156 %}
6157
6158 // Operands for bound floating pointer register arguments
6159 operand rxmm0() %{
6160 constraint(ALLOC_IN_RC(xmm0_reg));
6161 match(VecX);
6162 format%{%}
6163 interface(REG_INTER);
6164 %}
6165
6166 // Vectors
6167
6168 // Dummy generic vector class. Should be used for all vector operands.
6169 // Replaced with vec[SDXYZ] during post-selection pass.
6170 operand vec() %{
6171 constraint(ALLOC_IN_RC(dynamic));
6172 match(VecX);
6173 match(VecY);
6174 match(VecZ);
6175 match(VecS);
6176 match(VecD);
6177
6178 format %{ %}
6179 interface(REG_INTER);
6180 %}
6181
6182 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6183 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6184 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6185 // runtime code generation via reg_class_dynamic.
6186 operand legVec() %{
6187 constraint(ALLOC_IN_RC(dynamic));
6188 match(VecX);
6189 match(VecY);
6190 match(VecZ);
6191 match(VecS);
6192 match(VecD);
6193
6194 format %{ %}
6195 interface(REG_INTER);
6196 %}
6197
6198 // Replaces vec during post-selection cleanup. See above.
6199 operand vecS() %{
6200 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6201 match(VecS);
6202
6203 format %{ %}
6204 interface(REG_INTER);
6205 %}
6206
6207 // Replaces legVec during post-selection cleanup. See above.
6208 operand legVecS() %{
6209 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6210 match(VecS);
6211
6212 format %{ %}
6213 interface(REG_INTER);
6214 %}
6215
6216 // Replaces vec during post-selection cleanup. See above.
6217 operand vecD() %{
6218 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6219 match(VecD);
6220
6221 format %{ %}
6222 interface(REG_INTER);
6223 %}
6224
6225 // Replaces legVec during post-selection cleanup. See above.
6226 operand legVecD() %{
6227 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6228 match(VecD);
6229
6230 format %{ %}
6231 interface(REG_INTER);
6232 %}
6233
6234 // Replaces vec during post-selection cleanup. See above.
6235 operand vecX() %{
6236 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6237 match(VecX);
6238
6239 format %{ %}
6240 interface(REG_INTER);
6241 %}
6242
6243 // Replaces legVec during post-selection cleanup. See above.
6244 operand legVecX() %{
6245 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6246 match(VecX);
6247
6248 format %{ %}
6249 interface(REG_INTER);
6250 %}
6251
6252 // Replaces vec during post-selection cleanup. See above.
6253 operand vecY() %{
6254 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6255 match(VecY);
6256
6257 format %{ %}
6258 interface(REG_INTER);
6259 %}
6260
6261 // Replaces legVec during post-selection cleanup. See above.
6262 operand legVecY() %{
6263 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6264 match(VecY);
6265
6266 format %{ %}
6267 interface(REG_INTER);
6268 %}
6269
6270 // Replaces vec during post-selection cleanup. See above.
6271 operand vecZ() %{
6272 constraint(ALLOC_IN_RC(vectorz_reg));
6273 match(VecZ);
6274
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 // Replaces legVec during post-selection cleanup. See above.
6280 operand legVecZ() %{
6281 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6282 match(VecZ);
6283
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------OPERAND CLASSES----------------------------------------------------
6289 // Operand Classes are groups of operands that are used as to simplify
6290 // instruction definitions by not requiring the AD writer to specify separate
6291 // instructions for every form of operand when the instruction accepts
6292 // multiple operand types with the same basic encoding and format. The classic
6293 // case of this is memory operands.
6294
6295 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6296 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6297 indCompressedOopOffset,
6298 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6299 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6300 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6301
6302 //----------PIPELINE-----------------------------------------------------------
6303 // Rules which define the behavior of the target architectures pipeline.
6304 pipeline %{
6305
6306 //----------ATTRIBUTES---------------------------------------------------------
6307 attributes %{
6308 variable_size_instructions; // Fixed size instructions
6309 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6310 instruction_unit_size = 1; // An instruction is 1 bytes long
6311 instruction_fetch_unit_size = 16; // The processor fetches one line
6312 instruction_fetch_units = 1; // of 16 bytes
6313 %}
6314
6315 //----------RESOURCES----------------------------------------------------------
6316 // Resources are the functional units available to the machine
6317
6318 // Generic P2/P3 pipeline
6319 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6320 // 3 instructions decoded per cycle.
6321 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6322 // 3 ALU op, only ALU0 handles mul instructions.
6323 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6324 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6325 BR, FPU,
6326 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6327
6328 //----------PIPELINE DESCRIPTION-----------------------------------------------
6329 // Pipeline Description specifies the stages in the machine's pipeline
6330
6331 // Generic P2/P3 pipeline
6332 pipe_desc(S0, S1, S2, S3, S4, S5);
6333
6334 //----------PIPELINE CLASSES---------------------------------------------------
6335 // Pipeline Classes describe the stages in which input and output are
6336 // referenced by the hardware pipeline.
6337
6338 // Naming convention: ialu or fpu
6339 // Then: _reg
6340 // Then: _reg if there is a 2nd register
6341 // Then: _long if it's a pair of instructions implementing a long
6342 // Then: _fat if it requires the big decoder
6343 // Or: _mem if it requires the big decoder and a memory unit.
6344
6345 // Integer ALU reg operation
6346 pipe_class ialu_reg(rRegI dst)
6347 %{
6348 single_instruction;
6349 dst : S4(write);
6350 dst : S3(read);
6351 DECODE : S0; // any decoder
6352 ALU : S3; // any alu
6353 %}
6354
6355 // Long ALU reg operation
6356 pipe_class ialu_reg_long(rRegL dst)
6357 %{
6358 instruction_count(2);
6359 dst : S4(write);
6360 dst : S3(read);
6361 DECODE : S0(2); // any 2 decoders
6362 ALU : S3(2); // both alus
6363 %}
6364
6365 // Integer ALU reg operation using big decoder
6366 pipe_class ialu_reg_fat(rRegI dst)
6367 %{
6368 single_instruction;
6369 dst : S4(write);
6370 dst : S3(read);
6371 D0 : S0; // big decoder only
6372 ALU : S3; // any alu
6373 %}
6374
6375 // Integer ALU reg-reg operation
6376 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6377 %{
6378 single_instruction;
6379 dst : S4(write);
6380 src : S3(read);
6381 DECODE : S0; // any decoder
6382 ALU : S3; // any alu
6383 %}
6384
6385 // Integer ALU reg-reg operation
6386 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6387 %{
6388 single_instruction;
6389 dst : S4(write);
6390 src : S3(read);
6391 D0 : S0; // big decoder only
6392 ALU : S3; // any alu
6393 %}
6394
6395 // Integer ALU reg-mem operation
6396 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6397 %{
6398 single_instruction;
6399 dst : S5(write);
6400 mem : S3(read);
6401 D0 : S0; // big decoder only
6402 ALU : S4; // any alu
6403 MEM : S3; // any mem
6404 %}
6405
6406 // Integer mem operation (prefetch)
6407 pipe_class ialu_mem(memory mem)
6408 %{
6409 single_instruction;
6410 mem : S3(read);
6411 D0 : S0; // big decoder only
6412 MEM : S3; // any mem
6413 %}
6414
6415 // Integer Store to Memory
6416 pipe_class ialu_mem_reg(memory mem, rRegI src)
6417 %{
6418 single_instruction;
6419 mem : S3(read);
6420 src : S5(read);
6421 D0 : S0; // big decoder only
6422 ALU : S4; // any alu
6423 MEM : S3;
6424 %}
6425
6426 // // Long Store to Memory
6427 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6428 // %{
6429 // instruction_count(2);
6430 // mem : S3(read);
6431 // src : S5(read);
6432 // D0 : S0(2); // big decoder only; twice
6433 // ALU : S4(2); // any 2 alus
6434 // MEM : S3(2); // Both mems
6435 // %}
6436
6437 // Integer Store to Memory
6438 pipe_class ialu_mem_imm(memory mem)
6439 %{
6440 single_instruction;
6441 mem : S3(read);
6442 D0 : S0; // big decoder only
6443 ALU : S4; // any alu
6444 MEM : S3;
6445 %}
6446
6447 // Integer ALU0 reg-reg operation
6448 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6449 %{
6450 single_instruction;
6451 dst : S4(write);
6452 src : S3(read);
6453 D0 : S0; // Big decoder only
6454 ALU0 : S3; // only alu0
6455 %}
6456
6457 // Integer ALU0 reg-mem operation
6458 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6459 %{
6460 single_instruction;
6461 dst : S5(write);
6462 mem : S3(read);
6463 D0 : S0; // big decoder only
6464 ALU0 : S4; // ALU0 only
6465 MEM : S3; // any mem
6466 %}
6467
6468 // Integer ALU reg-reg operation
6469 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6470 %{
6471 single_instruction;
6472 cr : S4(write);
6473 src1 : S3(read);
6474 src2 : S3(read);
6475 DECODE : S0; // any decoder
6476 ALU : S3; // any alu
6477 %}
6478
6479 // Integer ALU reg-imm operation
6480 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6481 %{
6482 single_instruction;
6483 cr : S4(write);
6484 src1 : S3(read);
6485 DECODE : S0; // any decoder
6486 ALU : S3; // any alu
6487 %}
6488
6489 // Integer ALU reg-mem operation
6490 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6491 %{
6492 single_instruction;
6493 cr : S4(write);
6494 src1 : S3(read);
6495 src2 : S3(read);
6496 D0 : S0; // big decoder only
6497 ALU : S4; // any alu
6498 MEM : S3;
6499 %}
6500
6501 // Conditional move reg-reg
6502 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6503 %{
6504 instruction_count(4);
6505 y : S4(read);
6506 q : S3(read);
6507 p : S3(read);
6508 DECODE : S0(4); // any decoder
6509 %}
6510
6511 // Conditional move reg-reg
6512 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6513 %{
6514 single_instruction;
6515 dst : S4(write);
6516 src : S3(read);
6517 cr : S3(read);
6518 DECODE : S0; // any decoder
6519 %}
6520
6521 // Conditional move reg-mem
6522 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6523 %{
6524 single_instruction;
6525 dst : S4(write);
6526 src : S3(read);
6527 cr : S3(read);
6528 DECODE : S0; // any decoder
6529 MEM : S3;
6530 %}
6531
6532 // Conditional move reg-reg long
6533 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6534 %{
6535 single_instruction;
6536 dst : S4(write);
6537 src : S3(read);
6538 cr : S3(read);
6539 DECODE : S0(2); // any 2 decoders
6540 %}
6541
6542 // Float reg-reg operation
6543 pipe_class fpu_reg(regD dst)
6544 %{
6545 instruction_count(2);
6546 dst : S3(read);
6547 DECODE : S0(2); // any 2 decoders
6548 FPU : S3;
6549 %}
6550
6551 // Float reg-reg operation
6552 pipe_class fpu_reg_reg(regD dst, regD src)
6553 %{
6554 instruction_count(2);
6555 dst : S4(write);
6556 src : S3(read);
6557 DECODE : S0(2); // any 2 decoders
6558 FPU : S3;
6559 %}
6560
6561 // Float reg-reg operation
6562 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6563 %{
6564 instruction_count(3);
6565 dst : S4(write);
6566 src1 : S3(read);
6567 src2 : S3(read);
6568 DECODE : S0(3); // any 3 decoders
6569 FPU : S3(2);
6570 %}
6571
6572 // Float reg-reg operation
6573 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6574 %{
6575 instruction_count(4);
6576 dst : S4(write);
6577 src1 : S3(read);
6578 src2 : S3(read);
6579 src3 : S3(read);
6580 DECODE : S0(4); // any 3 decoders
6581 FPU : S3(2);
6582 %}
6583
6584 // Float reg-reg operation
6585 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6586 %{
6587 instruction_count(4);
6588 dst : S4(write);
6589 src1 : S3(read);
6590 src2 : S3(read);
6591 src3 : S3(read);
6592 DECODE : S1(3); // any 3 decoders
6593 D0 : S0; // Big decoder only
6594 FPU : S3(2);
6595 MEM : S3;
6596 %}
6597
6598 // Float reg-mem operation
6599 pipe_class fpu_reg_mem(regD dst, memory mem)
6600 %{
6601 instruction_count(2);
6602 dst : S5(write);
6603 mem : S3(read);
6604 D0 : S0; // big decoder only
6605 DECODE : S1; // any decoder for FPU POP
6606 FPU : S4;
6607 MEM : S3; // any mem
6608 %}
6609
6610 // Float reg-mem operation
6611 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6612 %{
6613 instruction_count(3);
6614 dst : S5(write);
6615 src1 : S3(read);
6616 mem : S3(read);
6617 D0 : S0; // big decoder only
6618 DECODE : S1(2); // any decoder for FPU POP
6619 FPU : S4;
6620 MEM : S3; // any mem
6621 %}
6622
6623 // Float mem-reg operation
6624 pipe_class fpu_mem_reg(memory mem, regD src)
6625 %{
6626 instruction_count(2);
6627 src : S5(read);
6628 mem : S3(read);
6629 DECODE : S0; // any decoder for FPU PUSH
6630 D0 : S1; // big decoder only
6631 FPU : S4;
6632 MEM : S3; // any mem
6633 %}
6634
6635 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6636 %{
6637 instruction_count(3);
6638 src1 : S3(read);
6639 src2 : S3(read);
6640 mem : S3(read);
6641 DECODE : S0(2); // any decoder for FPU PUSH
6642 D0 : S1; // big decoder only
6643 FPU : S4;
6644 MEM : S3; // any mem
6645 %}
6646
6647 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6648 %{
6649 instruction_count(3);
6650 src1 : S3(read);
6651 src2 : S3(read);
6652 mem : S4(read);
6653 DECODE : S0; // any decoder for FPU PUSH
6654 D0 : S0(2); // big decoder only
6655 FPU : S4;
6656 MEM : S3(2); // any mem
6657 %}
6658
6659 pipe_class fpu_mem_mem(memory dst, memory src1)
6660 %{
6661 instruction_count(2);
6662 src1 : S3(read);
6663 dst : S4(read);
6664 D0 : S0(2); // big decoder only
6665 MEM : S3(2); // any mem
6666 %}
6667
6668 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6669 %{
6670 instruction_count(3);
6671 src1 : S3(read);
6672 src2 : S3(read);
6673 dst : S4(read);
6674 D0 : S0(3); // big decoder only
6675 FPU : S4;
6676 MEM : S3(3); // any mem
6677 %}
6678
6679 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6680 %{
6681 instruction_count(3);
6682 src1 : S4(read);
6683 mem : S4(read);
6684 DECODE : S0; // any decoder for FPU PUSH
6685 D0 : S0(2); // big decoder only
6686 FPU : S4;
6687 MEM : S3(2); // any mem
6688 %}
6689
6690 // Float load constant
6691 pipe_class fpu_reg_con(regD dst)
6692 %{
6693 instruction_count(2);
6694 dst : S5(write);
6695 D0 : S0; // big decoder only for the load
6696 DECODE : S1; // any decoder for FPU POP
6697 FPU : S4;
6698 MEM : S3; // any mem
6699 %}
6700
6701 // Float load constant
6702 pipe_class fpu_reg_reg_con(regD dst, regD src)
6703 %{
6704 instruction_count(3);
6705 dst : S5(write);
6706 src : S3(read);
6707 D0 : S0; // big decoder only for the load
6708 DECODE : S1(2); // any decoder for FPU POP
6709 FPU : S4;
6710 MEM : S3; // any mem
6711 %}
6712
6713 // UnConditional branch
6714 pipe_class pipe_jmp(label labl)
6715 %{
6716 single_instruction;
6717 BR : S3;
6718 %}
6719
6720 // Conditional branch
6721 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6722 %{
6723 single_instruction;
6724 cr : S1(read);
6725 BR : S3;
6726 %}
6727
6728 // Allocation idiom
6729 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6730 %{
6731 instruction_count(1); force_serialization;
6732 fixed_latency(6);
6733 heap_ptr : S3(read);
6734 DECODE : S0(3);
6735 D0 : S2;
6736 MEM : S3;
6737 ALU : S3(2);
6738 dst : S5(write);
6739 BR : S5;
6740 %}
6741
6742 // Generic big/slow expanded idiom
6743 pipe_class pipe_slow()
6744 %{
6745 instruction_count(10); multiple_bundles; force_serialization;
6746 fixed_latency(100);
6747 D0 : S0(2);
6748 MEM : S3(2);
6749 %}
6750
6751 // The real do-nothing guy
6752 pipe_class empty()
6753 %{
6754 instruction_count(0);
6755 %}
6756
6757 // Define the class for the Nop node
6758 define
6759 %{
6760 MachNop = empty;
6761 %}
6762
6763 %}
6764
6765 //----------INSTRUCTIONS-------------------------------------------------------
6766 //
6767 // match -- States which machine-independent subtree may be replaced
6768 // by this instruction.
6769 // ins_cost -- The estimated cost of this instruction is used by instruction
6770 // selection to identify a minimum cost tree of machine
6771 // instructions that matches a tree of machine-independent
6772 // instructions.
6773 // format -- A string providing the disassembly for this instruction.
6774 // The value of an instruction's operand may be inserted
6775 // by referring to it with a '$' prefix.
6776 // opcode -- Three instruction opcodes may be provided. These are referred
6777 // to within an encode class as $primary, $secondary, and $tertiary
6778 // rrspectively. The primary opcode is commonly used to
6779 // indicate the type of machine instruction, while secondary
6780 // and tertiary are often used for prefix options or addressing
6781 // modes.
6782 // ins_encode -- A list of encode classes with parameters. The encode class
6783 // name must have been defined in an 'enc_class' specification
6784 // in the encode section of the architecture description.
6785
6786 // ============================================================================
6787
6788 instruct ShouldNotReachHere() %{
6789 match(Halt);
6790 format %{ "stop\t# ShouldNotReachHere" %}
6791 ins_encode %{
6792 if (is_reachable()) {
6793 const char* str = __ code_string(_halt_reason);
6794 __ stop(str);
6795 }
6796 %}
6797 ins_pipe(pipe_slow);
6798 %}
6799
6800 // ============================================================================
6801
6802 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6803 // Load Float
6804 instruct MoveF2VL(vlRegF dst, regF src) %{
6805 match(Set dst src);
6806 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6807 ins_encode %{
6808 ShouldNotReachHere();
6809 %}
6810 ins_pipe( fpu_reg_reg );
6811 %}
6812
6813 // Load Float
6814 instruct MoveF2LEG(legRegF dst, regF src) %{
6815 match(Set dst src);
6816 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6817 ins_encode %{
6818 ShouldNotReachHere();
6819 %}
6820 ins_pipe( fpu_reg_reg );
6821 %}
6822
6823 // Load Float
6824 instruct MoveVL2F(regF dst, vlRegF src) %{
6825 match(Set dst src);
6826 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6827 ins_encode %{
6828 ShouldNotReachHere();
6829 %}
6830 ins_pipe( fpu_reg_reg );
6831 %}
6832
6833 // Load Float
6834 instruct MoveLEG2F(regF dst, legRegF src) %{
6835 match(Set dst src);
6836 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6837 ins_encode %{
6838 ShouldNotReachHere();
6839 %}
6840 ins_pipe( fpu_reg_reg );
6841 %}
6842
6843 // Load Double
6844 instruct MoveD2VL(vlRegD dst, regD src) %{
6845 match(Set dst src);
6846 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6847 ins_encode %{
6848 ShouldNotReachHere();
6849 %}
6850 ins_pipe( fpu_reg_reg );
6851 %}
6852
6853 // Load Double
6854 instruct MoveD2LEG(legRegD dst, regD src) %{
6855 match(Set dst src);
6856 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6857 ins_encode %{
6858 ShouldNotReachHere();
6859 %}
6860 ins_pipe( fpu_reg_reg );
6861 %}
6862
6863 // Load Double
6864 instruct MoveVL2D(regD dst, vlRegD src) %{
6865 match(Set dst src);
6866 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6867 ins_encode %{
6868 ShouldNotReachHere();
6869 %}
6870 ins_pipe( fpu_reg_reg );
6871 %}
6872
6873 // Load Double
6874 instruct MoveLEG2D(regD dst, legRegD src) %{
6875 match(Set dst src);
6876 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6877 ins_encode %{
6878 ShouldNotReachHere();
6879 %}
6880 ins_pipe( fpu_reg_reg );
6881 %}
6882
6883 //----------Load/Store/Move Instructions---------------------------------------
6884 //----------Load Instructions--------------------------------------------------
6885
6886 // Load Byte (8 bit signed)
6887 instruct loadB(rRegI dst, memory mem)
6888 %{
6889 match(Set dst (LoadB mem));
6890
6891 ins_cost(125);
6892 format %{ "movsbl $dst, $mem\t# byte" %}
6893
6894 ins_encode %{
6895 __ movsbl($dst$$Register, $mem$$Address);
6896 %}
6897
6898 ins_pipe(ialu_reg_mem);
6899 %}
6900
6901 // Load Byte (8 bit signed) into Long Register
6902 instruct loadB2L(rRegL dst, memory mem)
6903 %{
6904 match(Set dst (ConvI2L (LoadB mem)));
6905
6906 ins_cost(125);
6907 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6908
6909 ins_encode %{
6910 __ movsbq($dst$$Register, $mem$$Address);
6911 %}
6912
6913 ins_pipe(ialu_reg_mem);
6914 %}
6915
6916 // Load Unsigned Byte (8 bit UNsigned)
6917 instruct loadUB(rRegI dst, memory mem)
6918 %{
6919 match(Set dst (LoadUB mem));
6920
6921 ins_cost(125);
6922 format %{ "movzbl $dst, $mem\t# ubyte" %}
6923
6924 ins_encode %{
6925 __ movzbl($dst$$Register, $mem$$Address);
6926 %}
6927
6928 ins_pipe(ialu_reg_mem);
6929 %}
6930
6931 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6932 instruct loadUB2L(rRegL dst, memory mem)
6933 %{
6934 match(Set dst (ConvI2L (LoadUB mem)));
6935
6936 ins_cost(125);
6937 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6938
6939 ins_encode %{
6940 __ movzbq($dst$$Register, $mem$$Address);
6941 %}
6942
6943 ins_pipe(ialu_reg_mem);
6944 %}
6945
6946 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6947 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6948 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6949 effect(KILL cr);
6950
6951 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6952 "andl $dst, right_n_bits($mask, 8)" %}
6953 ins_encode %{
6954 Register Rdst = $dst$$Register;
6955 __ movzbq(Rdst, $mem$$Address);
6956 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6957 %}
6958 ins_pipe(ialu_reg_mem);
6959 %}
6960
6961 // Load Short (16 bit signed)
6962 instruct loadS(rRegI dst, memory mem)
6963 %{
6964 match(Set dst (LoadS mem));
6965
6966 ins_cost(125);
6967 format %{ "movswl $dst, $mem\t# short" %}
6968
6969 ins_encode %{
6970 __ movswl($dst$$Register, $mem$$Address);
6971 %}
6972
6973 ins_pipe(ialu_reg_mem);
6974 %}
6975
6976 // Load Short (16 bit signed) to Byte (8 bit signed)
6977 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6978 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6979
6980 ins_cost(125);
6981 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6982 ins_encode %{
6983 __ movsbl($dst$$Register, $mem$$Address);
6984 %}
6985 ins_pipe(ialu_reg_mem);
6986 %}
6987
6988 // Load Short (16 bit signed) into Long Register
6989 instruct loadS2L(rRegL dst, memory mem)
6990 %{
6991 match(Set dst (ConvI2L (LoadS mem)));
6992
6993 ins_cost(125);
6994 format %{ "movswq $dst, $mem\t# short -> long" %}
6995
6996 ins_encode %{
6997 __ movswq($dst$$Register, $mem$$Address);
6998 %}
6999
7000 ins_pipe(ialu_reg_mem);
7001 %}
7002
7003 // Load Unsigned Short/Char (16 bit UNsigned)
7004 instruct loadUS(rRegI dst, memory mem)
7005 %{
7006 match(Set dst (LoadUS mem));
7007
7008 ins_cost(125);
7009 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7010
7011 ins_encode %{
7012 __ movzwl($dst$$Register, $mem$$Address);
7013 %}
7014
7015 ins_pipe(ialu_reg_mem);
7016 %}
7017
7018 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7019 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7020 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7021
7022 ins_cost(125);
7023 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7024 ins_encode %{
7025 __ movsbl($dst$$Register, $mem$$Address);
7026 %}
7027 ins_pipe(ialu_reg_mem);
7028 %}
7029
7030 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7031 instruct loadUS2L(rRegL dst, memory mem)
7032 %{
7033 match(Set dst (ConvI2L (LoadUS mem)));
7034
7035 ins_cost(125);
7036 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7037
7038 ins_encode %{
7039 __ movzwq($dst$$Register, $mem$$Address);
7040 %}
7041
7042 ins_pipe(ialu_reg_mem);
7043 %}
7044
7045 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7046 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7047 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7048
7049 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7050 ins_encode %{
7051 __ movzbq($dst$$Register, $mem$$Address);
7052 %}
7053 ins_pipe(ialu_reg_mem);
7054 %}
7055
7056 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7057 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7058 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7059 effect(KILL cr);
7060
7061 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7062 "andl $dst, right_n_bits($mask, 16)" %}
7063 ins_encode %{
7064 Register Rdst = $dst$$Register;
7065 __ movzwq(Rdst, $mem$$Address);
7066 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7067 %}
7068 ins_pipe(ialu_reg_mem);
7069 %}
7070
7071 // Load Integer
7072 instruct loadI(rRegI dst, memory mem)
7073 %{
7074 match(Set dst (LoadI mem));
7075
7076 ins_cost(125);
7077 format %{ "movl $dst, $mem\t# int" %}
7078
7079 ins_encode %{
7080 __ movl($dst$$Register, $mem$$Address);
7081 %}
7082
7083 ins_pipe(ialu_reg_mem);
7084 %}
7085
7086 // Load Integer (32 bit signed) to Byte (8 bit signed)
7087 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7088 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7089
7090 ins_cost(125);
7091 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7092 ins_encode %{
7093 __ movsbl($dst$$Register, $mem$$Address);
7094 %}
7095 ins_pipe(ialu_reg_mem);
7096 %}
7097
7098 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7099 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7100 match(Set dst (AndI (LoadI mem) mask));
7101
7102 ins_cost(125);
7103 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7104 ins_encode %{
7105 __ movzbl($dst$$Register, $mem$$Address);
7106 %}
7107 ins_pipe(ialu_reg_mem);
7108 %}
7109
7110 // Load Integer (32 bit signed) to Short (16 bit signed)
7111 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7112 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7113
7114 ins_cost(125);
7115 format %{ "movswl $dst, $mem\t# int -> short" %}
7116 ins_encode %{
7117 __ movswl($dst$$Register, $mem$$Address);
7118 %}
7119 ins_pipe(ialu_reg_mem);
7120 %}
7121
7122 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7123 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7124 match(Set dst (AndI (LoadI mem) mask));
7125
7126 ins_cost(125);
7127 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7128 ins_encode %{
7129 __ movzwl($dst$$Register, $mem$$Address);
7130 %}
7131 ins_pipe(ialu_reg_mem);
7132 %}
7133
7134 // Load Integer into Long Register
7135 instruct loadI2L(rRegL dst, memory mem)
7136 %{
7137 match(Set dst (ConvI2L (LoadI mem)));
7138
7139 ins_cost(125);
7140 format %{ "movslq $dst, $mem\t# int -> long" %}
7141
7142 ins_encode %{
7143 __ movslq($dst$$Register, $mem$$Address);
7144 %}
7145
7146 ins_pipe(ialu_reg_mem);
7147 %}
7148
7149 // Load Integer with mask 0xFF into Long Register
7150 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7151 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7152
7153 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7154 ins_encode %{
7155 __ movzbq($dst$$Register, $mem$$Address);
7156 %}
7157 ins_pipe(ialu_reg_mem);
7158 %}
7159
7160 // Load Integer with mask 0xFFFF into Long Register
7161 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7162 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7163
7164 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7165 ins_encode %{
7166 __ movzwq($dst$$Register, $mem$$Address);
7167 %}
7168 ins_pipe(ialu_reg_mem);
7169 %}
7170
7171 // Load Integer with a 31-bit mask into Long Register
7172 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7173 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7174 effect(KILL cr);
7175
7176 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7177 "andl $dst, $mask" %}
7178 ins_encode %{
7179 Register Rdst = $dst$$Register;
7180 __ movl(Rdst, $mem$$Address);
7181 __ andl(Rdst, $mask$$constant);
7182 %}
7183 ins_pipe(ialu_reg_mem);
7184 %}
7185
7186 // Load Unsigned Integer into Long Register
7187 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7188 %{
7189 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7190
7191 ins_cost(125);
7192 format %{ "movl $dst, $mem\t# uint -> long" %}
7193
7194 ins_encode %{
7195 __ movl($dst$$Register, $mem$$Address);
7196 %}
7197
7198 ins_pipe(ialu_reg_mem);
7199 %}
7200
7201 // Load Long
7202 instruct loadL(rRegL dst, memory mem)
7203 %{
7204 match(Set dst (LoadL mem));
7205
7206 ins_cost(125);
7207 format %{ "movq $dst, $mem\t# long" %}
7208
7209 ins_encode %{
7210 __ movq($dst$$Register, $mem$$Address);
7211 %}
7212
7213 ins_pipe(ialu_reg_mem); // XXX
7214 %}
7215
7216 // Load Range
7217 instruct loadRange(rRegI dst, memory mem)
7218 %{
7219 match(Set dst (LoadRange mem));
7220
7221 ins_cost(125); // XXX
7222 format %{ "movl $dst, $mem\t# range" %}
7223 ins_encode %{
7224 __ movl($dst$$Register, $mem$$Address);
7225 %}
7226 ins_pipe(ialu_reg_mem);
7227 %}
7228
7229 // Load Pointer
7230 instruct loadP(rRegP dst, memory mem)
7231 %{
7232 match(Set dst (LoadP mem));
7233 predicate(n->as_Load()->barrier_data() == 0);
7234
7235 ins_cost(125); // XXX
7236 format %{ "movq $dst, $mem\t# ptr" %}
7237 ins_encode %{
7238 __ movq($dst$$Register, $mem$$Address);
7239 %}
7240 ins_pipe(ialu_reg_mem); // XXX
7241 %}
7242
7243 // Load Compressed Pointer
7244 instruct loadN(rRegN dst, memory mem)
7245 %{
7246 predicate(n->as_Load()->barrier_data() == 0);
7247 match(Set dst (LoadN mem));
7248
7249 ins_cost(125); // XXX
7250 format %{ "movl $dst, $mem\t# compressed ptr" %}
7251 ins_encode %{
7252 __ movl($dst$$Register, $mem$$Address);
7253 %}
7254 ins_pipe(ialu_reg_mem); // XXX
7255 %}
7256
7257
7258 // Load Klass Pointer
7259 instruct loadKlass(rRegP dst, memory mem)
7260 %{
7261 match(Set dst (LoadKlass mem));
7262
7263 ins_cost(125); // XXX
7264 format %{ "movq $dst, $mem\t# class" %}
7265 ins_encode %{
7266 __ movq($dst$$Register, $mem$$Address);
7267 %}
7268 ins_pipe(ialu_reg_mem); // XXX
7269 %}
7270
7271 // Load narrow Klass Pointer
7272 instruct loadNKlass(rRegN dst, memory mem)
7273 %{
7274 predicate(!UseCompactObjectHeaders);
7275 match(Set dst (LoadNKlass mem));
7276
7277 ins_cost(125); // XXX
7278 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7279 ins_encode %{
7280 __ movl($dst$$Register, $mem$$Address);
7281 %}
7282 ins_pipe(ialu_reg_mem); // XXX
7283 %}
7284
7285 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7286 %{
7287 predicate(UseCompactObjectHeaders);
7288 match(Set dst (LoadNKlass mem));
7289 effect(KILL cr);
7290 ins_cost(125);
7291 format %{
7292 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7293 "shrl $dst, markWord::klass_shift"
7294 %}
7295 ins_encode %{
7296 // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
7297 // obj-start, so that we can load from the object's mark-word instead.
7298 Register d = $dst$$Register;
7299 Address s = ($mem$$Address).plus_disp(-Type::klass_offset());
7300 if (UseAPX) {
7301 __ eshrl(d, s, markWord::klass_shift, false);
7302 } else {
7303 __ movl(d, s);
7304 __ shrl(d, markWord::klass_shift);
7305 }
7306 %}
7307 ins_pipe(ialu_reg_mem);
7308 %}
7309
7310 // Load Float
7311 instruct loadF(regF dst, memory mem)
7312 %{
7313 match(Set dst (LoadF mem));
7314
7315 ins_cost(145); // XXX
7316 format %{ "movss $dst, $mem\t# float" %}
7317 ins_encode %{
7318 __ movflt($dst$$XMMRegister, $mem$$Address);
7319 %}
7320 ins_pipe(pipe_slow); // XXX
7321 %}
7322
7323 // Load Double
7324 instruct loadD_partial(regD dst, memory mem)
7325 %{
7326 predicate(!UseXmmLoadAndClearUpper);
7327 match(Set dst (LoadD mem));
7328
7329 ins_cost(145); // XXX
7330 format %{ "movlpd $dst, $mem\t# double" %}
7331 ins_encode %{
7332 __ movdbl($dst$$XMMRegister, $mem$$Address);
7333 %}
7334 ins_pipe(pipe_slow); // XXX
7335 %}
7336
7337 instruct loadD(regD dst, memory mem)
7338 %{
7339 predicate(UseXmmLoadAndClearUpper);
7340 match(Set dst (LoadD mem));
7341
7342 ins_cost(145); // XXX
7343 format %{ "movsd $dst, $mem\t# double" %}
7344 ins_encode %{
7345 __ movdbl($dst$$XMMRegister, $mem$$Address);
7346 %}
7347 ins_pipe(pipe_slow); // XXX
7348 %}
7349
7350 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7351 %{
7352 match(Set dst con);
7353
7354 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7355
7356 ins_encode %{
7357 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7358 %}
7359
7360 ins_pipe(ialu_reg_fat);
7361 %}
7362
7363 // max = java.lang.Math.max(float a, float b)
7364 instruct maxF_reg_avx10_2(regF dst, regF a, regF b) %{
7365 predicate(VM_Version::supports_avx10_2());
7366 match(Set dst (MaxF a b));
7367 format %{ "maxF $dst, $a, $b" %}
7368 ins_encode %{
7369 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7370 %}
7371 ins_pipe( pipe_slow );
7372 %}
7373
7374 // max = java.lang.Math.max(float a, float b)
7375 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7376 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7377 match(Set dst (MaxF a b));
7378 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7379 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7380 ins_encode %{
7381 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7382 %}
7383 ins_pipe( pipe_slow );
7384 %}
7385
7386 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7387 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7388 match(Set dst (MaxF a b));
7389 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7390
7391 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
7392 ins_encode %{
7393 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7394 false /*min*/, true /*single*/);
7395 %}
7396 ins_pipe( pipe_slow );
7397 %}
7398
7399 // max = java.lang.Math.max(double a, double b)
7400 instruct maxD_reg_avx10_2(regD dst, regD a, regD b) %{
7401 predicate(VM_Version::supports_avx10_2());
7402 match(Set dst (MaxD a b));
7403 format %{ "maxD $dst, $a, $b" %}
7404 ins_encode %{
7405 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MAX_COMPARE_SIGN);
7406 %}
7407 ins_pipe( pipe_slow );
7408 %}
7409
7410 // max = java.lang.Math.max(double a, double b)
7411 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7412 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7413 match(Set dst (MaxD a b));
7414 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7415 format %{ "maxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7416 ins_encode %{
7417 __ vminmax_fp(Op_MaxV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7418 %}
7419 ins_pipe( pipe_slow );
7420 %}
7421
7422 instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7423 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7424 match(Set dst (MaxD a b));
7425 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7426
7427 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7428 ins_encode %{
7429 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7430 false /*min*/, false /*single*/);
7431 %}
7432 ins_pipe( pipe_slow );
7433 %}
7434
7435 // max = java.lang.Math.min(float a, float b)
7436 instruct minF_reg_avx10_2(regF dst, regF a, regF b) %{
7437 predicate(VM_Version::supports_avx10_2());
7438 match(Set dst (MinF a b));
7439 format %{ "minF $dst, $a, $b" %}
7440 ins_encode %{
7441 __ eminmaxss($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7442 %}
7443 ins_pipe( pipe_slow );
7444 %}
7445
7446 // min = java.lang.Math.min(float a, float b)
7447 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
7448 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7449 match(Set dst (MinF a b));
7450 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7451 format %{ "minF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7452 ins_encode %{
7453 __ vminmax_fp(Op_MinV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7454 %}
7455 ins_pipe( pipe_slow );
7456 %}
7457
7458 instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
7459 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7460 match(Set dst (MinF a b));
7461 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7462
7463 format %{ "minF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7464 ins_encode %{
7465 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7466 true /*min*/, true /*single*/);
7467 %}
7468 ins_pipe( pipe_slow );
7469 %}
7470
7471 // max = java.lang.Math.min(double a, double b)
7472 instruct minD_reg_avx10_2(regD dst, regD a, regD b) %{
7473 predicate(VM_Version::supports_avx10_2());
7474 match(Set dst (MinD a b));
7475 format %{ "minD $dst, $a, $b" %}
7476 ins_encode %{
7477 __ eminmaxsd($dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, AVX10_2_MINMAX_MIN_COMPARE_SIGN);
7478 %}
7479 ins_pipe( pipe_slow );
7480 %}
7481
7482 // min = java.lang.Math.min(double a, double b)
7483 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
7484 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7485 match(Set dst (MinD a b));
7486 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7487 format %{ "minD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7488 ins_encode %{
7489 __ vminmax_fp(Op_MinV, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7490 %}
7491 ins_pipe( pipe_slow );
7492 %}
7493
7494 instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr) %{
7495 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7496 match(Set dst (MinD a b));
7497 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
7498
7499 format %{ "maxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
7500 ins_encode %{
7501 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
7502 true /*min*/, false /*single*/);
7503 %}
7504 ins_pipe( pipe_slow );
7505 %}
7506
7507 // Load Effective Address
7508 instruct leaP8(rRegP dst, indOffset8 mem)
7509 %{
7510 match(Set dst mem);
7511
7512 ins_cost(110); // XXX
7513 format %{ "leaq $dst, $mem\t# ptr 8" %}
7514 ins_encode %{
7515 __ leaq($dst$$Register, $mem$$Address);
7516 %}
7517 ins_pipe(ialu_reg_reg_fat);
7518 %}
7519
7520 instruct leaP32(rRegP dst, indOffset32 mem)
7521 %{
7522 match(Set dst mem);
7523
7524 ins_cost(110);
7525 format %{ "leaq $dst, $mem\t# ptr 32" %}
7526 ins_encode %{
7527 __ leaq($dst$$Register, $mem$$Address);
7528 %}
7529 ins_pipe(ialu_reg_reg_fat);
7530 %}
7531
7532 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7533 %{
7534 match(Set dst mem);
7535
7536 ins_cost(110);
7537 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7538 ins_encode %{
7539 __ leaq($dst$$Register, $mem$$Address);
7540 %}
7541 ins_pipe(ialu_reg_reg_fat);
7542 %}
7543
7544 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7545 %{
7546 match(Set dst mem);
7547
7548 ins_cost(110);
7549 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7550 ins_encode %{
7551 __ leaq($dst$$Register, $mem$$Address);
7552 %}
7553 ins_pipe(ialu_reg_reg_fat);
7554 %}
7555
7556 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7557 %{
7558 match(Set dst mem);
7559
7560 ins_cost(110);
7561 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7562 ins_encode %{
7563 __ leaq($dst$$Register, $mem$$Address);
7564 %}
7565 ins_pipe(ialu_reg_reg_fat);
7566 %}
7567
7568 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7569 %{
7570 match(Set dst mem);
7571
7572 ins_cost(110);
7573 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7574 ins_encode %{
7575 __ leaq($dst$$Register, $mem$$Address);
7576 %}
7577 ins_pipe(ialu_reg_reg_fat);
7578 %}
7579
7580 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7581 %{
7582 match(Set dst mem);
7583
7584 ins_cost(110);
7585 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7586 ins_encode %{
7587 __ leaq($dst$$Register, $mem$$Address);
7588 %}
7589 ins_pipe(ialu_reg_reg_fat);
7590 %}
7591
7592 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7593 %{
7594 match(Set dst mem);
7595
7596 ins_cost(110);
7597 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7598 ins_encode %{
7599 __ leaq($dst$$Register, $mem$$Address);
7600 %}
7601 ins_pipe(ialu_reg_reg_fat);
7602 %}
7603
7604 // Load Effective Address which uses Narrow (32-bits) oop
7605 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7606 %{
7607 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7608 match(Set dst mem);
7609
7610 ins_cost(110);
7611 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7612 ins_encode %{
7613 __ leaq($dst$$Register, $mem$$Address);
7614 %}
7615 ins_pipe(ialu_reg_reg_fat);
7616 %}
7617
7618 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7619 %{
7620 predicate(CompressedOops::shift() == 0);
7621 match(Set dst mem);
7622
7623 ins_cost(110); // XXX
7624 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7625 ins_encode %{
7626 __ leaq($dst$$Register, $mem$$Address);
7627 %}
7628 ins_pipe(ialu_reg_reg_fat);
7629 %}
7630
7631 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7632 %{
7633 predicate(CompressedOops::shift() == 0);
7634 match(Set dst mem);
7635
7636 ins_cost(110);
7637 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7638 ins_encode %{
7639 __ leaq($dst$$Register, $mem$$Address);
7640 %}
7641 ins_pipe(ialu_reg_reg_fat);
7642 %}
7643
7644 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7645 %{
7646 predicate(CompressedOops::shift() == 0);
7647 match(Set dst mem);
7648
7649 ins_cost(110);
7650 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7651 ins_encode %{
7652 __ leaq($dst$$Register, $mem$$Address);
7653 %}
7654 ins_pipe(ialu_reg_reg_fat);
7655 %}
7656
7657 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7658 %{
7659 predicate(CompressedOops::shift() == 0);
7660 match(Set dst mem);
7661
7662 ins_cost(110);
7663 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7664 ins_encode %{
7665 __ leaq($dst$$Register, $mem$$Address);
7666 %}
7667 ins_pipe(ialu_reg_reg_fat);
7668 %}
7669
7670 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7671 %{
7672 predicate(CompressedOops::shift() == 0);
7673 match(Set dst mem);
7674
7675 ins_cost(110);
7676 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7677 ins_encode %{
7678 __ leaq($dst$$Register, $mem$$Address);
7679 %}
7680 ins_pipe(ialu_reg_reg_fat);
7681 %}
7682
7683 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7684 %{
7685 predicate(CompressedOops::shift() == 0);
7686 match(Set dst mem);
7687
7688 ins_cost(110);
7689 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7690 ins_encode %{
7691 __ leaq($dst$$Register, $mem$$Address);
7692 %}
7693 ins_pipe(ialu_reg_reg_fat);
7694 %}
7695
7696 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7697 %{
7698 predicate(CompressedOops::shift() == 0);
7699 match(Set dst mem);
7700
7701 ins_cost(110);
7702 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7703 ins_encode %{
7704 __ leaq($dst$$Register, $mem$$Address);
7705 %}
7706 ins_pipe(ialu_reg_reg_fat);
7707 %}
7708
7709 instruct loadConI(rRegI dst, immI src)
7710 %{
7711 match(Set dst src);
7712
7713 format %{ "movl $dst, $src\t# int" %}
7714 ins_encode %{
7715 __ movl($dst$$Register, $src$$constant);
7716 %}
7717 ins_pipe(ialu_reg_fat); // XXX
7718 %}
7719
7720 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7721 %{
7722 match(Set dst src);
7723 effect(KILL cr);
7724
7725 ins_cost(50);
7726 format %{ "xorl $dst, $dst\t# int" %}
7727 ins_encode %{
7728 __ xorl($dst$$Register, $dst$$Register);
7729 %}
7730 ins_pipe(ialu_reg);
7731 %}
7732
7733 instruct loadConL(rRegL dst, immL src)
7734 %{
7735 match(Set dst src);
7736
7737 ins_cost(150);
7738 format %{ "movq $dst, $src\t# long" %}
7739 ins_encode %{
7740 __ mov64($dst$$Register, $src$$constant);
7741 %}
7742 ins_pipe(ialu_reg);
7743 %}
7744
7745 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7746 %{
7747 match(Set dst src);
7748 effect(KILL cr);
7749
7750 ins_cost(50);
7751 format %{ "xorl $dst, $dst\t# long" %}
7752 ins_encode %{
7753 __ xorl($dst$$Register, $dst$$Register);
7754 %}
7755 ins_pipe(ialu_reg); // XXX
7756 %}
7757
7758 instruct loadConUL32(rRegL dst, immUL32 src)
7759 %{
7760 match(Set dst src);
7761
7762 ins_cost(60);
7763 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7764 ins_encode %{
7765 __ movl($dst$$Register, $src$$constant);
7766 %}
7767 ins_pipe(ialu_reg);
7768 %}
7769
7770 instruct loadConL32(rRegL dst, immL32 src)
7771 %{
7772 match(Set dst src);
7773
7774 ins_cost(70);
7775 format %{ "movq $dst, $src\t# long (32-bit)" %}
7776 ins_encode %{
7777 __ movq($dst$$Register, $src$$constant);
7778 %}
7779 ins_pipe(ialu_reg);
7780 %}
7781
7782 instruct loadConP(rRegP dst, immP con) %{
7783 match(Set dst con);
7784
7785 format %{ "movq $dst, $con\t# ptr" %}
7786 ins_encode %{
7787 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7788 %}
7789 ins_pipe(ialu_reg_fat); // XXX
7790 %}
7791
7792 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7793 %{
7794 match(Set dst src);
7795 effect(KILL cr);
7796
7797 ins_cost(50);
7798 format %{ "xorl $dst, $dst\t# ptr" %}
7799 ins_encode %{
7800 __ xorl($dst$$Register, $dst$$Register);
7801 %}
7802 ins_pipe(ialu_reg);
7803 %}
7804
7805 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7806 %{
7807 match(Set dst src);
7808 effect(KILL cr);
7809
7810 ins_cost(60);
7811 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7812 ins_encode %{
7813 __ movl($dst$$Register, $src$$constant);
7814 %}
7815 ins_pipe(ialu_reg);
7816 %}
7817
7818 instruct loadConF(regF dst, immF con) %{
7819 match(Set dst con);
7820 ins_cost(125);
7821 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7822 ins_encode %{
7823 __ movflt($dst$$XMMRegister, $constantaddress($con));
7824 %}
7825 ins_pipe(pipe_slow);
7826 %}
7827
7828 instruct loadConH(regF dst, immH con) %{
7829 match(Set dst con);
7830 ins_cost(125);
7831 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7832 ins_encode %{
7833 __ movflt($dst$$XMMRegister, $constantaddress($con));
7834 %}
7835 ins_pipe(pipe_slow);
7836 %}
7837
7838 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7839 match(Set dst src);
7840 effect(KILL cr);
7841 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7842 ins_encode %{
7843 __ xorq($dst$$Register, $dst$$Register);
7844 %}
7845 ins_pipe(ialu_reg);
7846 %}
7847
7848 instruct loadConN(rRegN dst, immN src) %{
7849 match(Set dst src);
7850
7851 ins_cost(125);
7852 format %{ "movl $dst, $src\t# compressed ptr" %}
7853 ins_encode %{
7854 address con = (address)$src$$constant;
7855 if (con == nullptr) {
7856 ShouldNotReachHere();
7857 } else {
7858 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7859 }
7860 %}
7861 ins_pipe(ialu_reg_fat); // XXX
7862 %}
7863
7864 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7865 match(Set dst src);
7866
7867 ins_cost(125);
7868 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7869 ins_encode %{
7870 address con = (address)$src$$constant;
7871 if (con == nullptr) {
7872 ShouldNotReachHere();
7873 } else {
7874 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7875 }
7876 %}
7877 ins_pipe(ialu_reg_fat); // XXX
7878 %}
7879
7880 instruct loadConF0(regF dst, immF0 src)
7881 %{
7882 match(Set dst src);
7883 ins_cost(100);
7884
7885 format %{ "xorps $dst, $dst\t# float 0.0" %}
7886 ins_encode %{
7887 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7888 %}
7889 ins_pipe(pipe_slow);
7890 %}
7891
7892 // Use the same format since predicate() can not be used here.
7893 instruct loadConD(regD dst, immD con) %{
7894 match(Set dst con);
7895 ins_cost(125);
7896 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7897 ins_encode %{
7898 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7899 %}
7900 ins_pipe(pipe_slow);
7901 %}
7902
7903 instruct loadConD0(regD dst, immD0 src)
7904 %{
7905 match(Set dst src);
7906 ins_cost(100);
7907
7908 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7909 ins_encode %{
7910 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7911 %}
7912 ins_pipe(pipe_slow);
7913 %}
7914
7915 instruct loadSSI(rRegI dst, stackSlotI src)
7916 %{
7917 match(Set dst src);
7918
7919 ins_cost(125);
7920 format %{ "movl $dst, $src\t# int stk" %}
7921 ins_encode %{
7922 __ movl($dst$$Register, $src$$Address);
7923 %}
7924 ins_pipe(ialu_reg_mem);
7925 %}
7926
7927 instruct loadSSL(rRegL dst, stackSlotL src)
7928 %{
7929 match(Set dst src);
7930
7931 ins_cost(125);
7932 format %{ "movq $dst, $src\t# long stk" %}
7933 ins_encode %{
7934 __ movq($dst$$Register, $src$$Address);
7935 %}
7936 ins_pipe(ialu_reg_mem);
7937 %}
7938
7939 instruct loadSSP(rRegP dst, stackSlotP src)
7940 %{
7941 match(Set dst src);
7942
7943 ins_cost(125);
7944 format %{ "movq $dst, $src\t# ptr stk" %}
7945 ins_encode %{
7946 __ movq($dst$$Register, $src$$Address);
7947 %}
7948 ins_pipe(ialu_reg_mem);
7949 %}
7950
7951 instruct loadSSF(regF dst, stackSlotF src)
7952 %{
7953 match(Set dst src);
7954
7955 ins_cost(125);
7956 format %{ "movss $dst, $src\t# float stk" %}
7957 ins_encode %{
7958 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7959 %}
7960 ins_pipe(pipe_slow); // XXX
7961 %}
7962
7963 // Use the same format since predicate() can not be used here.
7964 instruct loadSSD(regD dst, stackSlotD src)
7965 %{
7966 match(Set dst src);
7967
7968 ins_cost(125);
7969 format %{ "movsd $dst, $src\t# double stk" %}
7970 ins_encode %{
7971 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7972 %}
7973 ins_pipe(pipe_slow); // XXX
7974 %}
7975
7976 // Prefetch instructions for allocation.
7977 // Must be safe to execute with invalid address (cannot fault).
7978
7979 instruct prefetchAlloc( memory mem ) %{
7980 predicate(AllocatePrefetchInstr==3);
7981 match(PrefetchAllocation mem);
7982 ins_cost(125);
7983
7984 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7985 ins_encode %{
7986 __ prefetchw($mem$$Address);
7987 %}
7988 ins_pipe(ialu_mem);
7989 %}
7990
7991 instruct prefetchAllocNTA( memory mem ) %{
7992 predicate(AllocatePrefetchInstr==0);
7993 match(PrefetchAllocation mem);
7994 ins_cost(125);
7995
7996 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7997 ins_encode %{
7998 __ prefetchnta($mem$$Address);
7999 %}
8000 ins_pipe(ialu_mem);
8001 %}
8002
8003 instruct prefetchAllocT0( memory mem ) %{
8004 predicate(AllocatePrefetchInstr==1);
8005 match(PrefetchAllocation mem);
8006 ins_cost(125);
8007
8008 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
8009 ins_encode %{
8010 __ prefetcht0($mem$$Address);
8011 %}
8012 ins_pipe(ialu_mem);
8013 %}
8014
8015 instruct prefetchAllocT2( memory mem ) %{
8016 predicate(AllocatePrefetchInstr==2);
8017 match(PrefetchAllocation mem);
8018 ins_cost(125);
8019
8020 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8021 ins_encode %{
8022 __ prefetcht2($mem$$Address);
8023 %}
8024 ins_pipe(ialu_mem);
8025 %}
8026
8027 //----------Store Instructions-------------------------------------------------
8028
8029 // Store Byte
8030 instruct storeB(memory mem, rRegI src)
8031 %{
8032 match(Set mem (StoreB mem src));
8033
8034 ins_cost(125); // XXX
8035 format %{ "movb $mem, $src\t# byte" %}
8036 ins_encode %{
8037 __ movb($mem$$Address, $src$$Register);
8038 %}
8039 ins_pipe(ialu_mem_reg);
8040 %}
8041
8042 // Store Char/Short
8043 instruct storeC(memory mem, rRegI src)
8044 %{
8045 match(Set mem (StoreC mem src));
8046
8047 ins_cost(125); // XXX
8048 format %{ "movw $mem, $src\t# char/short" %}
8049 ins_encode %{
8050 __ movw($mem$$Address, $src$$Register);
8051 %}
8052 ins_pipe(ialu_mem_reg);
8053 %}
8054
8055 // Store Integer
8056 instruct storeI(memory mem, rRegI src)
8057 %{
8058 match(Set mem (StoreI mem src));
8059
8060 ins_cost(125); // XXX
8061 format %{ "movl $mem, $src\t# int" %}
8062 ins_encode %{
8063 __ movl($mem$$Address, $src$$Register);
8064 %}
8065 ins_pipe(ialu_mem_reg);
8066 %}
8067
8068 // Store Long
8069 instruct storeL(memory mem, rRegL src)
8070 %{
8071 match(Set mem (StoreL mem src));
8072
8073 ins_cost(125); // XXX
8074 format %{ "movq $mem, $src\t# long" %}
8075 ins_encode %{
8076 __ movq($mem$$Address, $src$$Register);
8077 %}
8078 ins_pipe(ialu_mem_reg); // XXX
8079 %}
8080
8081 // Store Pointer
8082 instruct storeP(memory mem, any_RegP src)
8083 %{
8084 predicate(n->as_Store()->barrier_data() == 0);
8085 match(Set mem (StoreP mem src));
8086
8087 ins_cost(125); // XXX
8088 format %{ "movq $mem, $src\t# ptr" %}
8089 ins_encode %{
8090 __ movq($mem$$Address, $src$$Register);
8091 %}
8092 ins_pipe(ialu_mem_reg);
8093 %}
8094
8095 instruct storeImmP0(memory mem, immP0 zero)
8096 %{
8097 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8098 match(Set mem (StoreP mem zero));
8099
8100 ins_cost(125); // XXX
8101 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8102 ins_encode %{
8103 __ movq($mem$$Address, r12);
8104 %}
8105 ins_pipe(ialu_mem_reg);
8106 %}
8107
8108 // Store Null Pointer, mark word, or other simple pointer constant.
8109 instruct storeImmP(memory mem, immP31 src)
8110 %{
8111 predicate(n->as_Store()->barrier_data() == 0);
8112 match(Set mem (StoreP mem src));
8113
8114 ins_cost(150); // XXX
8115 format %{ "movq $mem, $src\t# ptr" %}
8116 ins_encode %{
8117 __ movq($mem$$Address, $src$$constant);
8118 %}
8119 ins_pipe(ialu_mem_imm);
8120 %}
8121
8122 // Store Compressed Pointer
8123 instruct storeN(memory mem, rRegN src)
8124 %{
8125 predicate(n->as_Store()->barrier_data() == 0);
8126 match(Set mem (StoreN mem src));
8127
8128 ins_cost(125); // XXX
8129 format %{ "movl $mem, $src\t# compressed ptr" %}
8130 ins_encode %{
8131 __ movl($mem$$Address, $src$$Register);
8132 %}
8133 ins_pipe(ialu_mem_reg);
8134 %}
8135
8136 instruct storeNKlass(memory mem, rRegN src)
8137 %{
8138 match(Set mem (StoreNKlass mem src));
8139
8140 ins_cost(125); // XXX
8141 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8142 ins_encode %{
8143 __ movl($mem$$Address, $src$$Register);
8144 %}
8145 ins_pipe(ialu_mem_reg);
8146 %}
8147
8148 instruct storeImmN0(memory mem, immN0 zero)
8149 %{
8150 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8151 match(Set mem (StoreN mem zero));
8152
8153 ins_cost(125); // XXX
8154 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8155 ins_encode %{
8156 __ movl($mem$$Address, r12);
8157 %}
8158 ins_pipe(ialu_mem_reg);
8159 %}
8160
8161 instruct storeImmN(memory mem, immN src)
8162 %{
8163 predicate(n->as_Store()->barrier_data() == 0);
8164 match(Set mem (StoreN mem src));
8165
8166 ins_cost(150); // XXX
8167 format %{ "movl $mem, $src\t# compressed ptr" %}
8168 ins_encode %{
8169 address con = (address)$src$$constant;
8170 if (con == nullptr) {
8171 __ movl($mem$$Address, 0);
8172 } else {
8173 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8174 }
8175 %}
8176 ins_pipe(ialu_mem_imm);
8177 %}
8178
8179 instruct storeImmNKlass(memory mem, immNKlass src)
8180 %{
8181 match(Set mem (StoreNKlass mem src));
8182
8183 ins_cost(150); // XXX
8184 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8185 ins_encode %{
8186 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8187 %}
8188 ins_pipe(ialu_mem_imm);
8189 %}
8190
8191 // Store Integer Immediate
8192 instruct storeImmI0(memory mem, immI_0 zero)
8193 %{
8194 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8195 match(Set mem (StoreI mem zero));
8196
8197 ins_cost(125); // XXX
8198 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8199 ins_encode %{
8200 __ movl($mem$$Address, r12);
8201 %}
8202 ins_pipe(ialu_mem_reg);
8203 %}
8204
8205 instruct storeImmI(memory mem, immI src)
8206 %{
8207 match(Set mem (StoreI mem src));
8208
8209 ins_cost(150);
8210 format %{ "movl $mem, $src\t# int" %}
8211 ins_encode %{
8212 __ movl($mem$$Address, $src$$constant);
8213 %}
8214 ins_pipe(ialu_mem_imm);
8215 %}
8216
8217 // Store Long Immediate
8218 instruct storeImmL0(memory mem, immL0 zero)
8219 %{
8220 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8221 match(Set mem (StoreL mem zero));
8222
8223 ins_cost(125); // XXX
8224 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8225 ins_encode %{
8226 __ movq($mem$$Address, r12);
8227 %}
8228 ins_pipe(ialu_mem_reg);
8229 %}
8230
8231 instruct storeImmL(memory mem, immL32 src)
8232 %{
8233 match(Set mem (StoreL mem src));
8234
8235 ins_cost(150);
8236 format %{ "movq $mem, $src\t# long" %}
8237 ins_encode %{
8238 __ movq($mem$$Address, $src$$constant);
8239 %}
8240 ins_pipe(ialu_mem_imm);
8241 %}
8242
8243 // Store Short/Char Immediate
8244 instruct storeImmC0(memory mem, immI_0 zero)
8245 %{
8246 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8247 match(Set mem (StoreC mem zero));
8248
8249 ins_cost(125); // XXX
8250 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8251 ins_encode %{
8252 __ movw($mem$$Address, r12);
8253 %}
8254 ins_pipe(ialu_mem_reg);
8255 %}
8256
8257 instruct storeImmI16(memory mem, immI16 src)
8258 %{
8259 predicate(UseStoreImmI16);
8260 match(Set mem (StoreC mem src));
8261
8262 ins_cost(150);
8263 format %{ "movw $mem, $src\t# short/char" %}
8264 ins_encode %{
8265 __ movw($mem$$Address, $src$$constant);
8266 %}
8267 ins_pipe(ialu_mem_imm);
8268 %}
8269
8270 // Store Byte Immediate
8271 instruct storeImmB0(memory mem, immI_0 zero)
8272 %{
8273 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8274 match(Set mem (StoreB mem zero));
8275
8276 ins_cost(125); // XXX
8277 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8278 ins_encode %{
8279 __ movb($mem$$Address, r12);
8280 %}
8281 ins_pipe(ialu_mem_reg);
8282 %}
8283
8284 instruct storeImmB(memory mem, immI8 src)
8285 %{
8286 match(Set mem (StoreB mem src));
8287
8288 ins_cost(150); // XXX
8289 format %{ "movb $mem, $src\t# byte" %}
8290 ins_encode %{
8291 __ movb($mem$$Address, $src$$constant);
8292 %}
8293 ins_pipe(ialu_mem_imm);
8294 %}
8295
8296 // Store Float
8297 instruct storeF(memory mem, regF src)
8298 %{
8299 match(Set mem (StoreF mem src));
8300
8301 ins_cost(95); // XXX
8302 format %{ "movss $mem, $src\t# float" %}
8303 ins_encode %{
8304 __ movflt($mem$$Address, $src$$XMMRegister);
8305 %}
8306 ins_pipe(pipe_slow); // XXX
8307 %}
8308
8309 // Store immediate Float value (it is faster than store from XMM register)
8310 instruct storeF0(memory mem, immF0 zero)
8311 %{
8312 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8313 match(Set mem (StoreF mem zero));
8314
8315 ins_cost(25); // XXX
8316 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8317 ins_encode %{
8318 __ movl($mem$$Address, r12);
8319 %}
8320 ins_pipe(ialu_mem_reg);
8321 %}
8322
8323 instruct storeF_imm(memory mem, immF src)
8324 %{
8325 match(Set mem (StoreF mem src));
8326
8327 ins_cost(50);
8328 format %{ "movl $mem, $src\t# float" %}
8329 ins_encode %{
8330 __ movl($mem$$Address, jint_cast($src$$constant));
8331 %}
8332 ins_pipe(ialu_mem_imm);
8333 %}
8334
8335 // Store Double
8336 instruct storeD(memory mem, regD src)
8337 %{
8338 match(Set mem (StoreD mem src));
8339
8340 ins_cost(95); // XXX
8341 format %{ "movsd $mem, $src\t# double" %}
8342 ins_encode %{
8343 __ movdbl($mem$$Address, $src$$XMMRegister);
8344 %}
8345 ins_pipe(pipe_slow); // XXX
8346 %}
8347
8348 // Store immediate double 0.0 (it is faster than store from XMM register)
8349 instruct storeD0_imm(memory mem, immD0 src)
8350 %{
8351 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8352 match(Set mem (StoreD mem src));
8353
8354 ins_cost(50);
8355 format %{ "movq $mem, $src\t# double 0." %}
8356 ins_encode %{
8357 __ movq($mem$$Address, $src$$constant);
8358 %}
8359 ins_pipe(ialu_mem_imm);
8360 %}
8361
8362 instruct storeD0(memory mem, immD0 zero)
8363 %{
8364 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8365 match(Set mem (StoreD mem zero));
8366
8367 ins_cost(25); // XXX
8368 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8369 ins_encode %{
8370 __ movq($mem$$Address, r12);
8371 %}
8372 ins_pipe(ialu_mem_reg);
8373 %}
8374
8375 instruct storeSSI(stackSlotI dst, rRegI src)
8376 %{
8377 match(Set dst src);
8378
8379 ins_cost(100);
8380 format %{ "movl $dst, $src\t# int stk" %}
8381 ins_encode %{
8382 __ movl($dst$$Address, $src$$Register);
8383 %}
8384 ins_pipe( ialu_mem_reg );
8385 %}
8386
8387 instruct storeSSL(stackSlotL dst, rRegL src)
8388 %{
8389 match(Set dst src);
8390
8391 ins_cost(100);
8392 format %{ "movq $dst, $src\t# long stk" %}
8393 ins_encode %{
8394 __ movq($dst$$Address, $src$$Register);
8395 %}
8396 ins_pipe(ialu_mem_reg);
8397 %}
8398
8399 instruct storeSSP(stackSlotP dst, rRegP src)
8400 %{
8401 match(Set dst src);
8402
8403 ins_cost(100);
8404 format %{ "movq $dst, $src\t# ptr stk" %}
8405 ins_encode %{
8406 __ movq($dst$$Address, $src$$Register);
8407 %}
8408 ins_pipe(ialu_mem_reg);
8409 %}
8410
8411 instruct storeSSF(stackSlotF dst, regF src)
8412 %{
8413 match(Set dst src);
8414
8415 ins_cost(95); // XXX
8416 format %{ "movss $dst, $src\t# float stk" %}
8417 ins_encode %{
8418 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8419 %}
8420 ins_pipe(pipe_slow); // XXX
8421 %}
8422
8423 instruct storeSSD(stackSlotD dst, regD src)
8424 %{
8425 match(Set dst src);
8426
8427 ins_cost(95); // XXX
8428 format %{ "movsd $dst, $src\t# double stk" %}
8429 ins_encode %{
8430 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8431 %}
8432 ins_pipe(pipe_slow); // XXX
8433 %}
8434
8435 instruct cacheWB(indirect addr)
8436 %{
8437 predicate(VM_Version::supports_data_cache_line_flush());
8438 match(CacheWB addr);
8439
8440 ins_cost(100);
8441 format %{"cache wb $addr" %}
8442 ins_encode %{
8443 assert($addr->index_position() < 0, "should be");
8444 assert($addr$$disp == 0, "should be");
8445 __ cache_wb(Address($addr$$base$$Register, 0));
8446 %}
8447 ins_pipe(pipe_slow); // XXX
8448 %}
8449
8450 instruct cacheWBPreSync()
8451 %{
8452 predicate(VM_Version::supports_data_cache_line_flush());
8453 match(CacheWBPreSync);
8454
8455 ins_cost(100);
8456 format %{"cache wb presync" %}
8457 ins_encode %{
8458 __ cache_wbsync(true);
8459 %}
8460 ins_pipe(pipe_slow); // XXX
8461 %}
8462
8463 instruct cacheWBPostSync()
8464 %{
8465 predicate(VM_Version::supports_data_cache_line_flush());
8466 match(CacheWBPostSync);
8467
8468 ins_cost(100);
8469 format %{"cache wb postsync" %}
8470 ins_encode %{
8471 __ cache_wbsync(false);
8472 %}
8473 ins_pipe(pipe_slow); // XXX
8474 %}
8475
8476 //----------BSWAP Instructions-------------------------------------------------
8477 instruct bytes_reverse_int(rRegI dst) %{
8478 match(Set dst (ReverseBytesI dst));
8479
8480 format %{ "bswapl $dst" %}
8481 ins_encode %{
8482 __ bswapl($dst$$Register);
8483 %}
8484 ins_pipe( ialu_reg );
8485 %}
8486
8487 instruct bytes_reverse_long(rRegL dst) %{
8488 match(Set dst (ReverseBytesL dst));
8489
8490 format %{ "bswapq $dst" %}
8491 ins_encode %{
8492 __ bswapq($dst$$Register);
8493 %}
8494 ins_pipe( ialu_reg);
8495 %}
8496
8497 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8498 match(Set dst (ReverseBytesUS dst));
8499 effect(KILL cr);
8500
8501 format %{ "bswapl $dst\n\t"
8502 "shrl $dst,16\n\t" %}
8503 ins_encode %{
8504 __ bswapl($dst$$Register);
8505 __ shrl($dst$$Register, 16);
8506 %}
8507 ins_pipe( ialu_reg );
8508 %}
8509
8510 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8511 match(Set dst (ReverseBytesS dst));
8512 effect(KILL cr);
8513
8514 format %{ "bswapl $dst\n\t"
8515 "sar $dst,16\n\t" %}
8516 ins_encode %{
8517 __ bswapl($dst$$Register);
8518 __ sarl($dst$$Register, 16);
8519 %}
8520 ins_pipe( ialu_reg );
8521 %}
8522
8523 //---------- Zeros Count Instructions ------------------------------------------
8524
8525 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8526 predicate(UseCountLeadingZerosInstruction);
8527 match(Set dst (CountLeadingZerosI src));
8528 effect(KILL cr);
8529
8530 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8531 ins_encode %{
8532 __ lzcntl($dst$$Register, $src$$Register);
8533 %}
8534 ins_pipe(ialu_reg);
8535 %}
8536
8537 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8538 predicate(UseCountLeadingZerosInstruction);
8539 match(Set dst (CountLeadingZerosI (LoadI src)));
8540 effect(KILL cr);
8541 ins_cost(175);
8542 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8543 ins_encode %{
8544 __ lzcntl($dst$$Register, $src$$Address);
8545 %}
8546 ins_pipe(ialu_reg_mem);
8547 %}
8548
8549 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8550 predicate(!UseCountLeadingZerosInstruction);
8551 match(Set dst (CountLeadingZerosI src));
8552 effect(KILL cr);
8553
8554 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8555 "jnz skip\n\t"
8556 "movl $dst, -1\n"
8557 "skip:\n\t"
8558 "negl $dst\n\t"
8559 "addl $dst, 31" %}
8560 ins_encode %{
8561 Register Rdst = $dst$$Register;
8562 Register Rsrc = $src$$Register;
8563 Label skip;
8564 __ bsrl(Rdst, Rsrc);
8565 __ jccb(Assembler::notZero, skip);
8566 __ movl(Rdst, -1);
8567 __ bind(skip);
8568 __ negl(Rdst);
8569 __ addl(Rdst, BitsPerInt - 1);
8570 %}
8571 ins_pipe(ialu_reg);
8572 %}
8573
8574 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8575 predicate(UseCountLeadingZerosInstruction);
8576 match(Set dst (CountLeadingZerosL src));
8577 effect(KILL cr);
8578
8579 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8580 ins_encode %{
8581 __ lzcntq($dst$$Register, $src$$Register);
8582 %}
8583 ins_pipe(ialu_reg);
8584 %}
8585
8586 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8587 predicate(UseCountLeadingZerosInstruction);
8588 match(Set dst (CountLeadingZerosL (LoadL src)));
8589 effect(KILL cr);
8590 ins_cost(175);
8591 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8592 ins_encode %{
8593 __ lzcntq($dst$$Register, $src$$Address);
8594 %}
8595 ins_pipe(ialu_reg_mem);
8596 %}
8597
8598 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8599 predicate(!UseCountLeadingZerosInstruction);
8600 match(Set dst (CountLeadingZerosL src));
8601 effect(KILL cr);
8602
8603 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8604 "jnz skip\n\t"
8605 "movl $dst, -1\n"
8606 "skip:\n\t"
8607 "negl $dst\n\t"
8608 "addl $dst, 63" %}
8609 ins_encode %{
8610 Register Rdst = $dst$$Register;
8611 Register Rsrc = $src$$Register;
8612 Label skip;
8613 __ bsrq(Rdst, Rsrc);
8614 __ jccb(Assembler::notZero, skip);
8615 __ movl(Rdst, -1);
8616 __ bind(skip);
8617 __ negl(Rdst);
8618 __ addl(Rdst, BitsPerLong - 1);
8619 %}
8620 ins_pipe(ialu_reg);
8621 %}
8622
8623 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8624 predicate(UseCountTrailingZerosInstruction);
8625 match(Set dst (CountTrailingZerosI src));
8626 effect(KILL cr);
8627
8628 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8629 ins_encode %{
8630 __ tzcntl($dst$$Register, $src$$Register);
8631 %}
8632 ins_pipe(ialu_reg);
8633 %}
8634
8635 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8636 predicate(UseCountTrailingZerosInstruction);
8637 match(Set dst (CountTrailingZerosI (LoadI src)));
8638 effect(KILL cr);
8639 ins_cost(175);
8640 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8641 ins_encode %{
8642 __ tzcntl($dst$$Register, $src$$Address);
8643 %}
8644 ins_pipe(ialu_reg_mem);
8645 %}
8646
8647 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8648 predicate(!UseCountTrailingZerosInstruction);
8649 match(Set dst (CountTrailingZerosI src));
8650 effect(KILL cr);
8651
8652 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8653 "jnz done\n\t"
8654 "movl $dst, 32\n"
8655 "done:" %}
8656 ins_encode %{
8657 Register Rdst = $dst$$Register;
8658 Label done;
8659 __ bsfl(Rdst, $src$$Register);
8660 __ jccb(Assembler::notZero, done);
8661 __ movl(Rdst, BitsPerInt);
8662 __ bind(done);
8663 %}
8664 ins_pipe(ialu_reg);
8665 %}
8666
8667 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8668 predicate(UseCountTrailingZerosInstruction);
8669 match(Set dst (CountTrailingZerosL src));
8670 effect(KILL cr);
8671
8672 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8673 ins_encode %{
8674 __ tzcntq($dst$$Register, $src$$Register);
8675 %}
8676 ins_pipe(ialu_reg);
8677 %}
8678
8679 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8680 predicate(UseCountTrailingZerosInstruction);
8681 match(Set dst (CountTrailingZerosL (LoadL src)));
8682 effect(KILL cr);
8683 ins_cost(175);
8684 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8685 ins_encode %{
8686 __ tzcntq($dst$$Register, $src$$Address);
8687 %}
8688 ins_pipe(ialu_reg_mem);
8689 %}
8690
8691 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8692 predicate(!UseCountTrailingZerosInstruction);
8693 match(Set dst (CountTrailingZerosL src));
8694 effect(KILL cr);
8695
8696 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8697 "jnz done\n\t"
8698 "movl $dst, 64\n"
8699 "done:" %}
8700 ins_encode %{
8701 Register Rdst = $dst$$Register;
8702 Label done;
8703 __ bsfq(Rdst, $src$$Register);
8704 __ jccb(Assembler::notZero, done);
8705 __ movl(Rdst, BitsPerLong);
8706 __ bind(done);
8707 %}
8708 ins_pipe(ialu_reg);
8709 %}
8710
8711 //--------------- Reverse Operation Instructions ----------------
8712 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8713 predicate(!VM_Version::supports_gfni());
8714 match(Set dst (ReverseI src));
8715 effect(TEMP dst, TEMP rtmp, KILL cr);
8716 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8717 ins_encode %{
8718 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8719 %}
8720 ins_pipe( ialu_reg );
8721 %}
8722
8723 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8724 predicate(VM_Version::supports_gfni());
8725 match(Set dst (ReverseI src));
8726 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8727 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8728 ins_encode %{
8729 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8730 %}
8731 ins_pipe( ialu_reg );
8732 %}
8733
8734 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8735 predicate(!VM_Version::supports_gfni());
8736 match(Set dst (ReverseL src));
8737 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8738 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8739 ins_encode %{
8740 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8741 %}
8742 ins_pipe( ialu_reg );
8743 %}
8744
8745 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8746 predicate(VM_Version::supports_gfni());
8747 match(Set dst (ReverseL src));
8748 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8749 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8750 ins_encode %{
8751 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8752 %}
8753 ins_pipe( ialu_reg );
8754 %}
8755
8756 //---------- Population Count Instructions -------------------------------------
8757
8758 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8759 predicate(UsePopCountInstruction);
8760 match(Set dst (PopCountI src));
8761 effect(KILL cr);
8762
8763 format %{ "popcnt $dst, $src" %}
8764 ins_encode %{
8765 __ popcntl($dst$$Register, $src$$Register);
8766 %}
8767 ins_pipe(ialu_reg);
8768 %}
8769
8770 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8771 predicate(UsePopCountInstruction);
8772 match(Set dst (PopCountI (LoadI mem)));
8773 effect(KILL cr);
8774
8775 format %{ "popcnt $dst, $mem" %}
8776 ins_encode %{
8777 __ popcntl($dst$$Register, $mem$$Address);
8778 %}
8779 ins_pipe(ialu_reg);
8780 %}
8781
8782 // Note: Long.bitCount(long) returns an int.
8783 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8784 predicate(UsePopCountInstruction);
8785 match(Set dst (PopCountL src));
8786 effect(KILL cr);
8787
8788 format %{ "popcnt $dst, $src" %}
8789 ins_encode %{
8790 __ popcntq($dst$$Register, $src$$Register);
8791 %}
8792 ins_pipe(ialu_reg);
8793 %}
8794
8795 // Note: Long.bitCount(long) returns an int.
8796 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8797 predicate(UsePopCountInstruction);
8798 match(Set dst (PopCountL (LoadL mem)));
8799 effect(KILL cr);
8800
8801 format %{ "popcnt $dst, $mem" %}
8802 ins_encode %{
8803 __ popcntq($dst$$Register, $mem$$Address);
8804 %}
8805 ins_pipe(ialu_reg);
8806 %}
8807
8808
8809 //----------MemBar Instructions-----------------------------------------------
8810 // Memory barrier flavors
8811
8812 instruct membar_acquire()
8813 %{
8814 match(MemBarAcquire);
8815 match(LoadFence);
8816 ins_cost(0);
8817
8818 size(0);
8819 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8820 ins_encode();
8821 ins_pipe(empty);
8822 %}
8823
8824 instruct membar_acquire_lock()
8825 %{
8826 match(MemBarAcquireLock);
8827 ins_cost(0);
8828
8829 size(0);
8830 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8831 ins_encode();
8832 ins_pipe(empty);
8833 %}
8834
8835 instruct membar_release()
8836 %{
8837 match(MemBarRelease);
8838 match(StoreFence);
8839 ins_cost(0);
8840
8841 size(0);
8842 format %{ "MEMBAR-release ! (empty encoding)" %}
8843 ins_encode();
8844 ins_pipe(empty);
8845 %}
8846
8847 instruct membar_release_lock()
8848 %{
8849 match(MemBarReleaseLock);
8850 ins_cost(0);
8851
8852 size(0);
8853 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8854 ins_encode();
8855 ins_pipe(empty);
8856 %}
8857
8858 instruct membar_storeload(rFlagsReg cr) %{
8859 match(MemBarStoreLoad);
8860 effect(KILL cr);
8861 ins_cost(400);
8862
8863 format %{
8864 $$template
8865 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8866 %}
8867 ins_encode %{
8868 __ membar(Assembler::StoreLoad);
8869 %}
8870 ins_pipe(pipe_slow);
8871 %}
8872
8873 instruct membar_volatile(rFlagsReg cr) %{
8874 match(MemBarVolatile);
8875 effect(KILL cr);
8876 ins_cost(400);
8877
8878 format %{
8879 $$template
8880 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8881 %}
8882 ins_encode %{
8883 __ membar(Assembler::StoreLoad);
8884 %}
8885 ins_pipe(pipe_slow);
8886 %}
8887
8888 instruct unnecessary_membar_volatile()
8889 %{
8890 match(MemBarVolatile);
8891 predicate(Matcher::post_store_load_barrier(n));
8892 ins_cost(0);
8893
8894 size(0);
8895 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8896 ins_encode();
8897 ins_pipe(empty);
8898 %}
8899
8900 instruct membar_full(rFlagsReg cr) %{
8901 match(MemBarFull);
8902 effect(KILL cr);
8903 ins_cost(400);
8904
8905 format %{
8906 $$template
8907 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8908 %}
8909 ins_encode %{
8910 __ membar(Assembler::StoreLoad);
8911 %}
8912 ins_pipe(pipe_slow);
8913 %}
8914
8915 instruct membar_storestore() %{
8916 match(MemBarStoreStore);
8917 match(StoreStoreFence);
8918 ins_cost(0);
8919
8920 size(0);
8921 format %{ "MEMBAR-storestore (empty encoding)" %}
8922 ins_encode( );
8923 ins_pipe(empty);
8924 %}
8925
8926 //----------Move Instructions--------------------------------------------------
8927
8928 instruct castX2P(rRegP dst, rRegL src)
8929 %{
8930 match(Set dst (CastX2P src));
8931
8932 format %{ "movq $dst, $src\t# long->ptr" %}
8933 ins_encode %{
8934 if ($dst$$reg != $src$$reg) {
8935 __ movptr($dst$$Register, $src$$Register);
8936 }
8937 %}
8938 ins_pipe(ialu_reg_reg); // XXX
8939 %}
8940
8941 instruct castP2X(rRegL dst, rRegP src)
8942 %{
8943 match(Set dst (CastP2X src));
8944
8945 format %{ "movq $dst, $src\t# ptr -> long" %}
8946 ins_encode %{
8947 if ($dst$$reg != $src$$reg) {
8948 __ movptr($dst$$Register, $src$$Register);
8949 }
8950 %}
8951 ins_pipe(ialu_reg_reg); // XXX
8952 %}
8953
8954 // Convert oop into int for vectors alignment masking
8955 instruct convP2I(rRegI dst, rRegP src)
8956 %{
8957 match(Set dst (ConvL2I (CastP2X src)));
8958
8959 format %{ "movl $dst, $src\t# ptr -> int" %}
8960 ins_encode %{
8961 __ movl($dst$$Register, $src$$Register);
8962 %}
8963 ins_pipe(ialu_reg_reg); // XXX
8964 %}
8965
8966 // Convert compressed oop into int for vectors alignment masking
8967 // in case of 32bit oops (heap < 4Gb).
8968 instruct convN2I(rRegI dst, rRegN src)
8969 %{
8970 predicate(CompressedOops::shift() == 0);
8971 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8972
8973 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8974 ins_encode %{
8975 __ movl($dst$$Register, $src$$Register);
8976 %}
8977 ins_pipe(ialu_reg_reg); // XXX
8978 %}
8979
8980 // Convert oop pointer into compressed form
8981 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8982 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8983 match(Set dst (EncodeP src));
8984 effect(KILL cr);
8985 format %{ "encode_heap_oop $dst,$src" %}
8986 ins_encode %{
8987 Register s = $src$$Register;
8988 Register d = $dst$$Register;
8989 if (s != d) {
8990 __ movq(d, s);
8991 }
8992 __ encode_heap_oop(d);
8993 %}
8994 ins_pipe(ialu_reg_long);
8995 %}
8996
8997 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8998 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8999 match(Set dst (EncodeP src));
9000 effect(KILL cr);
9001 format %{ "encode_heap_oop_not_null $dst,$src" %}
9002 ins_encode %{
9003 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9004 %}
9005 ins_pipe(ialu_reg_long);
9006 %}
9007
9008 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
9009 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9010 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9011 match(Set dst (DecodeN src));
9012 effect(KILL cr);
9013 format %{ "decode_heap_oop $dst,$src" %}
9014 ins_encode %{
9015 Register s = $src$$Register;
9016 Register d = $dst$$Register;
9017 if (s != d) {
9018 __ movq(d, s);
9019 }
9020 __ decode_heap_oop(d);
9021 %}
9022 ins_pipe(ialu_reg_long);
9023 %}
9024
9025 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9026 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9027 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9028 match(Set dst (DecodeN src));
9029 effect(KILL cr);
9030 format %{ "decode_heap_oop_not_null $dst,$src" %}
9031 ins_encode %{
9032 Register s = $src$$Register;
9033 Register d = $dst$$Register;
9034 if (s != d) {
9035 __ decode_heap_oop_not_null(d, s);
9036 } else {
9037 __ decode_heap_oop_not_null(d);
9038 }
9039 %}
9040 ins_pipe(ialu_reg_long);
9041 %}
9042
9043 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9044 match(Set dst (EncodePKlass src));
9045 effect(TEMP dst, KILL cr);
9046 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9047 ins_encode %{
9048 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9049 %}
9050 ins_pipe(ialu_reg_long);
9051 %}
9052
9053 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9054 match(Set dst (DecodeNKlass src));
9055 effect(TEMP dst, KILL cr);
9056 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9057 ins_encode %{
9058 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9059 %}
9060 ins_pipe(ialu_reg_long);
9061 %}
9062
9063 //----------Conditional Move---------------------------------------------------
9064 // Jump
9065 // dummy instruction for generating temp registers
9066 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9067 match(Jump (LShiftL switch_val shift));
9068 ins_cost(350);
9069 predicate(false);
9070 effect(TEMP dest);
9071
9072 format %{ "leaq $dest, [$constantaddress]\n\t"
9073 "jmp [$dest + $switch_val << $shift]\n\t" %}
9074 ins_encode %{
9075 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9076 // to do that and the compiler is using that register as one it can allocate.
9077 // So we build it all by hand.
9078 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9079 // ArrayAddress dispatch(table, index);
9080 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9081 __ lea($dest$$Register, $constantaddress);
9082 __ jmp(dispatch);
9083 %}
9084 ins_pipe(pipe_jmp);
9085 %}
9086
9087 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9088 match(Jump (AddL (LShiftL switch_val shift) offset));
9089 ins_cost(350);
9090 effect(TEMP dest);
9091
9092 format %{ "leaq $dest, [$constantaddress]\n\t"
9093 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9094 ins_encode %{
9095 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9096 // to do that and the compiler is using that register as one it can allocate.
9097 // So we build it all by hand.
9098 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9099 // ArrayAddress dispatch(table, index);
9100 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9101 __ lea($dest$$Register, $constantaddress);
9102 __ jmp(dispatch);
9103 %}
9104 ins_pipe(pipe_jmp);
9105 %}
9106
9107 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9108 match(Jump switch_val);
9109 ins_cost(350);
9110 effect(TEMP dest);
9111
9112 format %{ "leaq $dest, [$constantaddress]\n\t"
9113 "jmp [$dest + $switch_val]\n\t" %}
9114 ins_encode %{
9115 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9116 // to do that and the compiler is using that register as one it can allocate.
9117 // So we build it all by hand.
9118 // Address index(noreg, switch_reg, Address::times_1);
9119 // ArrayAddress dispatch(table, index);
9120 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9121 __ lea($dest$$Register, $constantaddress);
9122 __ jmp(dispatch);
9123 %}
9124 ins_pipe(pipe_jmp);
9125 %}
9126
9127 // Conditional move
9128 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9129 %{
9130 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9131 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9132
9133 ins_cost(100); // XXX
9134 format %{ "setbn$cop $dst\t# signed, int" %}
9135 ins_encode %{
9136 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9137 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9138 %}
9139 ins_pipe(ialu_reg);
9140 %}
9141
9142 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9143 %{
9144 predicate(!UseAPX);
9145 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9146
9147 ins_cost(200); // XXX
9148 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9149 ins_encode %{
9150 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9151 %}
9152 ins_pipe(pipe_cmov_reg);
9153 %}
9154
9155 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9156 %{
9157 predicate(UseAPX);
9158 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9159
9160 ins_cost(200);
9161 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9162 ins_encode %{
9163 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9164 %}
9165 ins_pipe(pipe_cmov_reg);
9166 %}
9167
9168 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9169 %{
9170 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9171 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9172
9173 ins_cost(100); // XXX
9174 format %{ "setbn$cop $dst\t# unsigned, int" %}
9175 ins_encode %{
9176 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9177 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9178 %}
9179 ins_pipe(ialu_reg);
9180 %}
9181
9182 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9183 predicate(!UseAPX);
9184 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9185
9186 ins_cost(200); // XXX
9187 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9188 ins_encode %{
9189 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9190 %}
9191 ins_pipe(pipe_cmov_reg);
9192 %}
9193
9194 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9195 predicate(UseAPX);
9196 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9197
9198 ins_cost(200);
9199 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9200 ins_encode %{
9201 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9202 %}
9203 ins_pipe(pipe_cmov_reg);
9204 %}
9205
9206 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9207 %{
9208 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9209 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9210
9211 ins_cost(100); // XXX
9212 format %{ "setbn$cop $dst\t# unsigned, int" %}
9213 ins_encode %{
9214 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9215 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9216 %}
9217 ins_pipe(ialu_reg);
9218 %}
9219
9220 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9221 %{
9222 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9223 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9224
9225 ins_cost(100); // XXX
9226 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9227 ins_encode %{
9228 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9229 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9230 %}
9231 ins_pipe(ialu_reg);
9232 %}
9233
9234 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9235 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9236
9237 ins_cost(200);
9238 expand %{
9239 cmovI_regU(cop, cr, dst, src);
9240 %}
9241 %}
9242
9243 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9244 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9245
9246 ins_cost(200);
9247 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9248 ins_encode %{
9249 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9250 %}
9251 ins_pipe(pipe_cmov_reg);
9252 %}
9253
9254 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9255 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9256 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9257
9258 ins_cost(200); // XXX
9259 format %{ "cmovpl $dst, $src\n\t"
9260 "cmovnel $dst, $src" %}
9261 ins_encode %{
9262 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9263 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9264 %}
9265 ins_pipe(pipe_cmov_reg);
9266 %}
9267
9268 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9269 // inputs of the CMove
9270 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9271 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9272 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9273 effect(TEMP dst);
9274
9275 ins_cost(200); // XXX
9276 format %{ "cmovpl $dst, $src\n\t"
9277 "cmovnel $dst, $src" %}
9278 ins_encode %{
9279 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9280 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9281 %}
9282 ins_pipe(pipe_cmov_reg);
9283 %}
9284
9285 // Conditional move
9286 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9287 predicate(!UseAPX);
9288 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9289
9290 ins_cost(250); // XXX
9291 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9292 ins_encode %{
9293 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9294 %}
9295 ins_pipe(pipe_cmov_mem);
9296 %}
9297
9298 // Conditional move
9299 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9300 %{
9301 predicate(UseAPX);
9302 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9303
9304 ins_cost(250);
9305 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9306 ins_encode %{
9307 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9308 %}
9309 ins_pipe(pipe_cmov_mem);
9310 %}
9311
9312 // Conditional move
9313 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9314 %{
9315 predicate(!UseAPX);
9316 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9317
9318 ins_cost(250); // XXX
9319 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9320 ins_encode %{
9321 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9322 %}
9323 ins_pipe(pipe_cmov_mem);
9324 %}
9325
9326 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9327 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9328
9329 ins_cost(250);
9330 expand %{
9331 cmovI_memU(cop, cr, dst, src);
9332 %}
9333 %}
9334
9335 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9336 %{
9337 predicate(UseAPX);
9338 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9339
9340 ins_cost(250);
9341 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9342 ins_encode %{
9343 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9344 %}
9345 ins_pipe(pipe_cmov_mem);
9346 %}
9347
9348 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9349 %{
9350 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9351
9352 ins_cost(250);
9353 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9354 ins_encode %{
9355 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9356 %}
9357 ins_pipe(pipe_cmov_mem);
9358 %}
9359
9360 // Conditional move
9361 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9362 %{
9363 predicate(!UseAPX);
9364 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9365
9366 ins_cost(200); // XXX
9367 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9368 ins_encode %{
9369 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9370 %}
9371 ins_pipe(pipe_cmov_reg);
9372 %}
9373
9374 // Conditional move ndd
9375 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9376 %{
9377 predicate(UseAPX);
9378 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9379
9380 ins_cost(200);
9381 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9382 ins_encode %{
9383 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9384 %}
9385 ins_pipe(pipe_cmov_reg);
9386 %}
9387
9388 // Conditional move
9389 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9390 %{
9391 predicate(!UseAPX);
9392 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9393
9394 ins_cost(200); // XXX
9395 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9396 ins_encode %{
9397 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9398 %}
9399 ins_pipe(pipe_cmov_reg);
9400 %}
9401
9402 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9403 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9404
9405 ins_cost(200);
9406 expand %{
9407 cmovN_regU(cop, cr, dst, src);
9408 %}
9409 %}
9410
9411 // Conditional move ndd
9412 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9413 %{
9414 predicate(UseAPX);
9415 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9416
9417 ins_cost(200);
9418 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9419 ins_encode %{
9420 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9421 %}
9422 ins_pipe(pipe_cmov_reg);
9423 %}
9424
9425 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9426 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9427
9428 ins_cost(200);
9429 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9430 ins_encode %{
9431 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9432 %}
9433 ins_pipe(pipe_cmov_reg);
9434 %}
9435
9436 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9437 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9438 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9439
9440 ins_cost(200); // XXX
9441 format %{ "cmovpl $dst, $src\n\t"
9442 "cmovnel $dst, $src" %}
9443 ins_encode %{
9444 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9445 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9446 %}
9447 ins_pipe(pipe_cmov_reg);
9448 %}
9449
9450 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9451 // inputs of the CMove
9452 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9453 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9454 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9455
9456 ins_cost(200); // XXX
9457 format %{ "cmovpl $dst, $src\n\t"
9458 "cmovnel $dst, $src" %}
9459 ins_encode %{
9460 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9461 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9462 %}
9463 ins_pipe(pipe_cmov_reg);
9464 %}
9465
9466 // Conditional move
9467 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9468 %{
9469 predicate(!UseAPX);
9470 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9471
9472 ins_cost(200); // XXX
9473 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9474 ins_encode %{
9475 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9476 %}
9477 ins_pipe(pipe_cmov_reg); // XXX
9478 %}
9479
9480 // Conditional move ndd
9481 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9482 %{
9483 predicate(UseAPX);
9484 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9485
9486 ins_cost(200);
9487 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9488 ins_encode %{
9489 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9490 %}
9491 ins_pipe(pipe_cmov_reg);
9492 %}
9493
9494 // Conditional move
9495 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9496 %{
9497 predicate(!UseAPX);
9498 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9499
9500 ins_cost(200); // XXX
9501 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9502 ins_encode %{
9503 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9504 %}
9505 ins_pipe(pipe_cmov_reg); // XXX
9506 %}
9507
9508 // Conditional move ndd
9509 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9510 %{
9511 predicate(UseAPX);
9512 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9513
9514 ins_cost(200);
9515 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9516 ins_encode %{
9517 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9518 %}
9519 ins_pipe(pipe_cmov_reg);
9520 %}
9521
9522 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9523 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9524
9525 ins_cost(200);
9526 expand %{
9527 cmovP_regU(cop, cr, dst, src);
9528 %}
9529 %}
9530
9531 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9532 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9533
9534 ins_cost(200);
9535 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9536 ins_encode %{
9537 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9538 %}
9539 ins_pipe(pipe_cmov_reg);
9540 %}
9541
9542 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9543 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9544 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9545
9546 ins_cost(200); // XXX
9547 format %{ "cmovpq $dst, $src\n\t"
9548 "cmovneq $dst, $src" %}
9549 ins_encode %{
9550 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9551 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9552 %}
9553 ins_pipe(pipe_cmov_reg);
9554 %}
9555
9556 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9557 // inputs of the CMove
9558 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9559 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9560 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9561
9562 ins_cost(200); // XXX
9563 format %{ "cmovpq $dst, $src\n\t"
9564 "cmovneq $dst, $src" %}
9565 ins_encode %{
9566 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9567 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9568 %}
9569 ins_pipe(pipe_cmov_reg);
9570 %}
9571
9572 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9573 %{
9574 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9575 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9576
9577 ins_cost(100); // XXX
9578 format %{ "setbn$cop $dst\t# signed, long" %}
9579 ins_encode %{
9580 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9581 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9582 %}
9583 ins_pipe(ialu_reg);
9584 %}
9585
9586 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9587 %{
9588 predicate(!UseAPX);
9589 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9590
9591 ins_cost(200); // XXX
9592 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9593 ins_encode %{
9594 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9595 %}
9596 ins_pipe(pipe_cmov_reg); // XXX
9597 %}
9598
9599 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9600 %{
9601 predicate(UseAPX);
9602 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9603
9604 ins_cost(200);
9605 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9606 ins_encode %{
9607 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9608 %}
9609 ins_pipe(pipe_cmov_reg);
9610 %}
9611
9612 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9613 %{
9614 predicate(!UseAPX);
9615 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9616
9617 ins_cost(200); // XXX
9618 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9619 ins_encode %{
9620 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9621 %}
9622 ins_pipe(pipe_cmov_mem); // XXX
9623 %}
9624
9625 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9626 %{
9627 predicate(UseAPX);
9628 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9629
9630 ins_cost(200);
9631 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9632 ins_encode %{
9633 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9634 %}
9635 ins_pipe(pipe_cmov_mem);
9636 %}
9637
9638 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9639 %{
9640 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9641 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9642
9643 ins_cost(100); // XXX
9644 format %{ "setbn$cop $dst\t# unsigned, long" %}
9645 ins_encode %{
9646 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9647 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9648 %}
9649 ins_pipe(ialu_reg);
9650 %}
9651
9652 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9653 %{
9654 predicate(!UseAPX);
9655 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9656
9657 ins_cost(200); // XXX
9658 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9659 ins_encode %{
9660 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9661 %}
9662 ins_pipe(pipe_cmov_reg); // XXX
9663 %}
9664
9665 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9666 %{
9667 predicate(UseAPX);
9668 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9669
9670 ins_cost(200);
9671 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9672 ins_encode %{
9673 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9674 %}
9675 ins_pipe(pipe_cmov_reg);
9676 %}
9677
9678 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9679 %{
9680 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9681 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9682
9683 ins_cost(100); // XXX
9684 format %{ "setbn$cop $dst\t# unsigned, long" %}
9685 ins_encode %{
9686 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9687 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9688 %}
9689 ins_pipe(ialu_reg);
9690 %}
9691
9692 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9693 %{
9694 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9695 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9696
9697 ins_cost(100); // XXX
9698 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9699 ins_encode %{
9700 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9701 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9702 %}
9703 ins_pipe(ialu_reg);
9704 %}
9705
9706 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9707 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9708
9709 ins_cost(200);
9710 expand %{
9711 cmovL_regU(cop, cr, dst, src);
9712 %}
9713 %}
9714
9715 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9716 %{
9717 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9718
9719 ins_cost(200);
9720 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9721 ins_encode %{
9722 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9723 %}
9724 ins_pipe(pipe_cmov_reg);
9725 %}
9726
9727 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9728 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9729 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9730
9731 ins_cost(200); // XXX
9732 format %{ "cmovpq $dst, $src\n\t"
9733 "cmovneq $dst, $src" %}
9734 ins_encode %{
9735 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9736 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9737 %}
9738 ins_pipe(pipe_cmov_reg);
9739 %}
9740
9741 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9742 // inputs of the CMove
9743 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9744 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9745 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9746
9747 ins_cost(200); // XXX
9748 format %{ "cmovpq $dst, $src\n\t"
9749 "cmovneq $dst, $src" %}
9750 ins_encode %{
9751 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9752 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9753 %}
9754 ins_pipe(pipe_cmov_reg);
9755 %}
9756
9757 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9758 %{
9759 predicate(!UseAPX);
9760 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9761
9762 ins_cost(200); // XXX
9763 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9764 ins_encode %{
9765 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9766 %}
9767 ins_pipe(pipe_cmov_mem); // XXX
9768 %}
9769
9770 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9771 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9772
9773 ins_cost(200);
9774 expand %{
9775 cmovL_memU(cop, cr, dst, src);
9776 %}
9777 %}
9778
9779 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9780 %{
9781 predicate(UseAPX);
9782 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9783
9784 ins_cost(200);
9785 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9786 ins_encode %{
9787 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9788 %}
9789 ins_pipe(pipe_cmov_mem);
9790 %}
9791
9792 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9793 %{
9794 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9795
9796 ins_cost(200);
9797 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9798 ins_encode %{
9799 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9800 %}
9801 ins_pipe(pipe_cmov_mem);
9802 %}
9803
9804 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9805 %{
9806 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9807
9808 ins_cost(200); // XXX
9809 format %{ "jn$cop skip\t# signed cmove float\n\t"
9810 "movss $dst, $src\n"
9811 "skip:" %}
9812 ins_encode %{
9813 Label Lskip;
9814 // Invert sense of branch from sense of CMOV
9815 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9816 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9817 __ bind(Lskip);
9818 %}
9819 ins_pipe(pipe_slow);
9820 %}
9821
9822 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9823 %{
9824 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9825
9826 ins_cost(200); // XXX
9827 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9828 "movss $dst, $src\n"
9829 "skip:" %}
9830 ins_encode %{
9831 Label Lskip;
9832 // Invert sense of branch from sense of CMOV
9833 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9834 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9835 __ bind(Lskip);
9836 %}
9837 ins_pipe(pipe_slow);
9838 %}
9839
9840 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9841 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9842
9843 ins_cost(200);
9844 expand %{
9845 cmovF_regU(cop, cr, dst, src);
9846 %}
9847 %}
9848
9849 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9850 %{
9851 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9852
9853 ins_cost(200); // XXX
9854 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9855 "movss $dst, $src\n"
9856 "skip:" %}
9857 ins_encode %{
9858 Label Lskip;
9859 // Invert sense of branch from sense of CMOV
9860 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9861 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9862 __ bind(Lskip);
9863 %}
9864 ins_pipe(pipe_slow);
9865 %}
9866
9867 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9868 %{
9869 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9870
9871 ins_cost(200); // XXX
9872 format %{ "jn$cop skip\t# signed cmove double\n\t"
9873 "movsd $dst, $src\n"
9874 "skip:" %}
9875 ins_encode %{
9876 Label Lskip;
9877 // Invert sense of branch from sense of CMOV
9878 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9879 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9880 __ bind(Lskip);
9881 %}
9882 ins_pipe(pipe_slow);
9883 %}
9884
9885 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9886 %{
9887 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9888
9889 ins_cost(200); // XXX
9890 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9891 "movsd $dst, $src\n"
9892 "skip:" %}
9893 ins_encode %{
9894 Label Lskip;
9895 // Invert sense of branch from sense of CMOV
9896 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9897 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9898 __ bind(Lskip);
9899 %}
9900 ins_pipe(pipe_slow);
9901 %}
9902
9903 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9904 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9905
9906 ins_cost(200);
9907 expand %{
9908 cmovD_regU(cop, cr, dst, src);
9909 %}
9910 %}
9911
9912 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9913 %{
9914 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9915
9916 ins_cost(200); // XXX
9917 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9918 "movsd $dst, $src\n"
9919 "skip:" %}
9920 ins_encode %{
9921 Label Lskip;
9922 // Invert sense of branch from sense of CMOV
9923 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9924 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9925 __ bind(Lskip);
9926 %}
9927 ins_pipe(pipe_slow);
9928 %}
9929
9930 //----------Arithmetic Instructions--------------------------------------------
9931 //----------Addition Instructions----------------------------------------------
9932
9933 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9934 %{
9935 predicate(!UseAPX);
9936 match(Set dst (AddI dst src));
9937 effect(KILL cr);
9938 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9939 format %{ "addl $dst, $src\t# int" %}
9940 ins_encode %{
9941 __ addl($dst$$Register, $src$$Register);
9942 %}
9943 ins_pipe(ialu_reg_reg);
9944 %}
9945
9946 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9947 %{
9948 predicate(UseAPX);
9949 match(Set dst (AddI src1 src2));
9950 effect(KILL cr);
9951 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9952
9953 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9954 ins_encode %{
9955 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9956 %}
9957 ins_pipe(ialu_reg_reg);
9958 %}
9959
9960 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9961 %{
9962 predicate(!UseAPX);
9963 match(Set dst (AddI dst src));
9964 effect(KILL cr);
9965 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9966
9967 format %{ "addl $dst, $src\t# int" %}
9968 ins_encode %{
9969 __ addl($dst$$Register, $src$$constant);
9970 %}
9971 ins_pipe( ialu_reg );
9972 %}
9973
9974 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9975 %{
9976 predicate(UseAPX);
9977 match(Set dst (AddI src1 src2));
9978 effect(KILL cr);
9979 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9980
9981 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9982 ins_encode %{
9983 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9984 %}
9985 ins_pipe( ialu_reg );
9986 %}
9987
9988 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9989 %{
9990 predicate(UseAPX);
9991 match(Set dst (AddI (LoadI src1) src2));
9992 effect(KILL cr);
9993 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9994
9995 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9996 ins_encode %{
9997 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9998 %}
9999 ins_pipe( ialu_reg );
10000 %}
10001
10002 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10003 %{
10004 predicate(!UseAPX);
10005 match(Set dst (AddI dst (LoadI src)));
10006 effect(KILL cr);
10007 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10008
10009 ins_cost(150); // XXX
10010 format %{ "addl $dst, $src\t# int" %}
10011 ins_encode %{
10012 __ addl($dst$$Register, $src$$Address);
10013 %}
10014 ins_pipe(ialu_reg_mem);
10015 %}
10016
10017 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10018 %{
10019 predicate(UseAPX);
10020 match(Set dst (AddI src1 (LoadI src2)));
10021 effect(KILL cr);
10022 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10023
10024 ins_cost(150);
10025 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10026 ins_encode %{
10027 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10028 %}
10029 ins_pipe(ialu_reg_mem);
10030 %}
10031
10032 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10033 %{
10034 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10035 effect(KILL cr);
10036 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10037
10038 ins_cost(150); // XXX
10039 format %{ "addl $dst, $src\t# int" %}
10040 ins_encode %{
10041 __ addl($dst$$Address, $src$$Register);
10042 %}
10043 ins_pipe(ialu_mem_reg);
10044 %}
10045
10046 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10047 %{
10048 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10049 effect(KILL cr);
10050 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10051
10052
10053 ins_cost(125); // XXX
10054 format %{ "addl $dst, $src\t# int" %}
10055 ins_encode %{
10056 __ addl($dst$$Address, $src$$constant);
10057 %}
10058 ins_pipe(ialu_mem_imm);
10059 %}
10060
10061 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10062 %{
10063 predicate(!UseAPX && UseIncDec);
10064 match(Set dst (AddI dst src));
10065 effect(KILL cr);
10066
10067 format %{ "incl $dst\t# int" %}
10068 ins_encode %{
10069 __ incrementl($dst$$Register);
10070 %}
10071 ins_pipe(ialu_reg);
10072 %}
10073
10074 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10075 %{
10076 predicate(UseAPX && UseIncDec);
10077 match(Set dst (AddI src val));
10078 effect(KILL cr);
10079 flag(PD::Flag_ndd_demotable_opr1);
10080
10081 format %{ "eincl $dst, $src\t# int ndd" %}
10082 ins_encode %{
10083 __ eincl($dst$$Register, $src$$Register, false);
10084 %}
10085 ins_pipe(ialu_reg);
10086 %}
10087
10088 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10089 %{
10090 predicate(UseAPX && UseIncDec);
10091 match(Set dst (AddI (LoadI src) val));
10092 effect(KILL cr);
10093
10094 format %{ "eincl $dst, $src\t# int ndd" %}
10095 ins_encode %{
10096 __ eincl($dst$$Register, $src$$Address, false);
10097 %}
10098 ins_pipe(ialu_reg);
10099 %}
10100
10101 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10102 %{
10103 predicate(UseIncDec);
10104 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10105 effect(KILL cr);
10106
10107 ins_cost(125); // XXX
10108 format %{ "incl $dst\t# int" %}
10109 ins_encode %{
10110 __ incrementl($dst$$Address);
10111 %}
10112 ins_pipe(ialu_mem_imm);
10113 %}
10114
10115 // XXX why does that use AddI
10116 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10117 %{
10118 predicate(!UseAPX && UseIncDec);
10119 match(Set dst (AddI dst src));
10120 effect(KILL cr);
10121
10122 format %{ "decl $dst\t# int" %}
10123 ins_encode %{
10124 __ decrementl($dst$$Register);
10125 %}
10126 ins_pipe(ialu_reg);
10127 %}
10128
10129 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10130 %{
10131 predicate(UseAPX && UseIncDec);
10132 match(Set dst (AddI src val));
10133 effect(KILL cr);
10134 flag(PD::Flag_ndd_demotable_opr1);
10135
10136 format %{ "edecl $dst, $src\t# int ndd" %}
10137 ins_encode %{
10138 __ edecl($dst$$Register, $src$$Register, false);
10139 %}
10140 ins_pipe(ialu_reg);
10141 %}
10142
10143 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10144 %{
10145 predicate(UseAPX && UseIncDec);
10146 match(Set dst (AddI (LoadI src) val));
10147 effect(KILL cr);
10148
10149 format %{ "edecl $dst, $src\t# int ndd" %}
10150 ins_encode %{
10151 __ edecl($dst$$Register, $src$$Address, false);
10152 %}
10153 ins_pipe(ialu_reg);
10154 %}
10155
10156 // XXX why does that use AddI
10157 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10158 %{
10159 predicate(UseIncDec);
10160 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10161 effect(KILL cr);
10162
10163 ins_cost(125); // XXX
10164 format %{ "decl $dst\t# int" %}
10165 ins_encode %{
10166 __ decrementl($dst$$Address);
10167 %}
10168 ins_pipe(ialu_mem_imm);
10169 %}
10170
10171 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10172 %{
10173 predicate(VM_Version::supports_fast_2op_lea());
10174 match(Set dst (AddI (LShiftI index scale) disp));
10175
10176 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10177 ins_encode %{
10178 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10179 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10180 %}
10181 ins_pipe(ialu_reg_reg);
10182 %}
10183
10184 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10185 %{
10186 predicate(VM_Version::supports_fast_3op_lea());
10187 match(Set dst (AddI (AddI base index) disp));
10188
10189 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10190 ins_encode %{
10191 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10192 %}
10193 ins_pipe(ialu_reg_reg);
10194 %}
10195
10196 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10197 %{
10198 predicate(VM_Version::supports_fast_2op_lea());
10199 match(Set dst (AddI base (LShiftI index scale)));
10200
10201 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10202 ins_encode %{
10203 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10204 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10205 %}
10206 ins_pipe(ialu_reg_reg);
10207 %}
10208
10209 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10210 %{
10211 predicate(VM_Version::supports_fast_3op_lea());
10212 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10213
10214 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10215 ins_encode %{
10216 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10217 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10218 %}
10219 ins_pipe(ialu_reg_reg);
10220 %}
10221
10222 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10223 %{
10224 predicate(!UseAPX);
10225 match(Set dst (AddL dst src));
10226 effect(KILL cr);
10227 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10228
10229 format %{ "addq $dst, $src\t# long" %}
10230 ins_encode %{
10231 __ addq($dst$$Register, $src$$Register);
10232 %}
10233 ins_pipe(ialu_reg_reg);
10234 %}
10235
10236 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10237 %{
10238 predicate(UseAPX);
10239 match(Set dst (AddL src1 src2));
10240 effect(KILL cr);
10241 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10242
10243 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10244 ins_encode %{
10245 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10246 %}
10247 ins_pipe(ialu_reg_reg);
10248 %}
10249
10250 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10251 %{
10252 predicate(!UseAPX);
10253 match(Set dst (AddL dst src));
10254 effect(KILL cr);
10255 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10256
10257 format %{ "addq $dst, $src\t# long" %}
10258 ins_encode %{
10259 __ addq($dst$$Register, $src$$constant);
10260 %}
10261 ins_pipe( ialu_reg );
10262 %}
10263
10264 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10265 %{
10266 predicate(UseAPX);
10267 match(Set dst (AddL src1 src2));
10268 effect(KILL cr);
10269 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10270
10271 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10272 ins_encode %{
10273 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10274 %}
10275 ins_pipe( ialu_reg );
10276 %}
10277
10278 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10279 %{
10280 predicate(UseAPX);
10281 match(Set dst (AddL (LoadL src1) src2));
10282 effect(KILL cr);
10283 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10284
10285 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10286 ins_encode %{
10287 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10288 %}
10289 ins_pipe( ialu_reg );
10290 %}
10291
10292 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10293 %{
10294 predicate(!UseAPX);
10295 match(Set dst (AddL dst (LoadL src)));
10296 effect(KILL cr);
10297 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10298
10299 ins_cost(150); // XXX
10300 format %{ "addq $dst, $src\t# long" %}
10301 ins_encode %{
10302 __ addq($dst$$Register, $src$$Address);
10303 %}
10304 ins_pipe(ialu_reg_mem);
10305 %}
10306
10307 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10308 %{
10309 predicate(UseAPX);
10310 match(Set dst (AddL src1 (LoadL src2)));
10311 effect(KILL cr);
10312 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10313
10314 ins_cost(150);
10315 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10316 ins_encode %{
10317 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10318 %}
10319 ins_pipe(ialu_reg_mem);
10320 %}
10321
10322 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10323 %{
10324 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10325 effect(KILL cr);
10326 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10327
10328 ins_cost(150); // XXX
10329 format %{ "addq $dst, $src\t# long" %}
10330 ins_encode %{
10331 __ addq($dst$$Address, $src$$Register);
10332 %}
10333 ins_pipe(ialu_mem_reg);
10334 %}
10335
10336 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10337 %{
10338 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10339 effect(KILL cr);
10340 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10341
10342 ins_cost(125); // XXX
10343 format %{ "addq $dst, $src\t# long" %}
10344 ins_encode %{
10345 __ addq($dst$$Address, $src$$constant);
10346 %}
10347 ins_pipe(ialu_mem_imm);
10348 %}
10349
10350 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10351 %{
10352 predicate(!UseAPX && UseIncDec);
10353 match(Set dst (AddL dst src));
10354 effect(KILL cr);
10355
10356 format %{ "incq $dst\t# long" %}
10357 ins_encode %{
10358 __ incrementq($dst$$Register);
10359 %}
10360 ins_pipe(ialu_reg);
10361 %}
10362
10363 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10364 %{
10365 predicate(UseAPX && UseIncDec);
10366 match(Set dst (AddL src val));
10367 effect(KILL cr);
10368 flag(PD::Flag_ndd_demotable_opr1);
10369
10370 format %{ "eincq $dst, $src\t# long ndd" %}
10371 ins_encode %{
10372 __ eincq($dst$$Register, $src$$Register, false);
10373 %}
10374 ins_pipe(ialu_reg);
10375 %}
10376
10377 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10378 %{
10379 predicate(UseAPX && UseIncDec);
10380 match(Set dst (AddL (LoadL src) val));
10381 effect(KILL cr);
10382
10383 format %{ "eincq $dst, $src\t# long ndd" %}
10384 ins_encode %{
10385 __ eincq($dst$$Register, $src$$Address, false);
10386 %}
10387 ins_pipe(ialu_reg);
10388 %}
10389
10390 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10391 %{
10392 predicate(UseIncDec);
10393 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10394 effect(KILL cr);
10395
10396 ins_cost(125); // XXX
10397 format %{ "incq $dst\t# long" %}
10398 ins_encode %{
10399 __ incrementq($dst$$Address);
10400 %}
10401 ins_pipe(ialu_mem_imm);
10402 %}
10403
10404 // XXX why does that use AddL
10405 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10406 %{
10407 predicate(!UseAPX && UseIncDec);
10408 match(Set dst (AddL dst src));
10409 effect(KILL cr);
10410
10411 format %{ "decq $dst\t# long" %}
10412 ins_encode %{
10413 __ decrementq($dst$$Register);
10414 %}
10415 ins_pipe(ialu_reg);
10416 %}
10417
10418 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10419 %{
10420 predicate(UseAPX && UseIncDec);
10421 match(Set dst (AddL src val));
10422 effect(KILL cr);
10423 flag(PD::Flag_ndd_demotable_opr1);
10424
10425 format %{ "edecq $dst, $src\t# long ndd" %}
10426 ins_encode %{
10427 __ edecq($dst$$Register, $src$$Register, false);
10428 %}
10429 ins_pipe(ialu_reg);
10430 %}
10431
10432 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10433 %{
10434 predicate(UseAPX && UseIncDec);
10435 match(Set dst (AddL (LoadL src) val));
10436 effect(KILL cr);
10437
10438 format %{ "edecq $dst, $src\t# long ndd" %}
10439 ins_encode %{
10440 __ edecq($dst$$Register, $src$$Address, false);
10441 %}
10442 ins_pipe(ialu_reg);
10443 %}
10444
10445 // XXX why does that use AddL
10446 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10447 %{
10448 predicate(UseIncDec);
10449 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10450 effect(KILL cr);
10451
10452 ins_cost(125); // XXX
10453 format %{ "decq $dst\t# long" %}
10454 ins_encode %{
10455 __ decrementq($dst$$Address);
10456 %}
10457 ins_pipe(ialu_mem_imm);
10458 %}
10459
10460 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10461 %{
10462 predicate(VM_Version::supports_fast_2op_lea());
10463 match(Set dst (AddL (LShiftL index scale) disp));
10464
10465 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10466 ins_encode %{
10467 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10468 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10469 %}
10470 ins_pipe(ialu_reg_reg);
10471 %}
10472
10473 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10474 %{
10475 predicate(VM_Version::supports_fast_3op_lea());
10476 match(Set dst (AddL (AddL base index) disp));
10477
10478 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10479 ins_encode %{
10480 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10481 %}
10482 ins_pipe(ialu_reg_reg);
10483 %}
10484
10485 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10486 %{
10487 predicate(VM_Version::supports_fast_2op_lea());
10488 match(Set dst (AddL base (LShiftL index scale)));
10489
10490 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10491 ins_encode %{
10492 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10493 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10494 %}
10495 ins_pipe(ialu_reg_reg);
10496 %}
10497
10498 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10499 %{
10500 predicate(VM_Version::supports_fast_3op_lea());
10501 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10502
10503 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10504 ins_encode %{
10505 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10506 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10507 %}
10508 ins_pipe(ialu_reg_reg);
10509 %}
10510
10511 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10512 %{
10513 match(Set dst (AddP dst src));
10514 effect(KILL cr);
10515 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10516
10517 format %{ "addq $dst, $src\t# ptr" %}
10518 ins_encode %{
10519 __ addq($dst$$Register, $src$$Register);
10520 %}
10521 ins_pipe(ialu_reg_reg);
10522 %}
10523
10524 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10525 %{
10526 match(Set dst (AddP dst src));
10527 effect(KILL cr);
10528 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10529
10530 format %{ "addq $dst, $src\t# ptr" %}
10531 ins_encode %{
10532 __ addq($dst$$Register, $src$$constant);
10533 %}
10534 ins_pipe( ialu_reg );
10535 %}
10536
10537 // XXX addP mem ops ????
10538
10539 instruct checkCastPP(rRegP dst)
10540 %{
10541 match(Set dst (CheckCastPP dst));
10542
10543 size(0);
10544 format %{ "# checkcastPP of $dst" %}
10545 ins_encode(/* empty encoding */);
10546 ins_pipe(empty);
10547 %}
10548
10549 instruct castPP(rRegP dst)
10550 %{
10551 match(Set dst (CastPP dst));
10552
10553 size(0);
10554 format %{ "# castPP of $dst" %}
10555 ins_encode(/* empty encoding */);
10556 ins_pipe(empty);
10557 %}
10558
10559 instruct castII(rRegI dst)
10560 %{
10561 predicate(VerifyConstraintCasts == 0);
10562 match(Set dst (CastII dst));
10563
10564 size(0);
10565 format %{ "# castII of $dst" %}
10566 ins_encode(/* empty encoding */);
10567 ins_cost(0);
10568 ins_pipe(empty);
10569 %}
10570
10571 instruct castII_checked(rRegI dst, rFlagsReg cr)
10572 %{
10573 predicate(VerifyConstraintCasts > 0);
10574 match(Set dst (CastII dst));
10575
10576 effect(KILL cr);
10577 format %{ "# cast_checked_II $dst" %}
10578 ins_encode %{
10579 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10580 %}
10581 ins_pipe(pipe_slow);
10582 %}
10583
10584 instruct castLL(rRegL dst)
10585 %{
10586 predicate(VerifyConstraintCasts == 0);
10587 match(Set dst (CastLL dst));
10588
10589 size(0);
10590 format %{ "# castLL of $dst" %}
10591 ins_encode(/* empty encoding */);
10592 ins_cost(0);
10593 ins_pipe(empty);
10594 %}
10595
10596 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10597 %{
10598 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10599 match(Set dst (CastLL dst));
10600
10601 effect(KILL cr);
10602 format %{ "# cast_checked_LL $dst" %}
10603 ins_encode %{
10604 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10605 %}
10606 ins_pipe(pipe_slow);
10607 %}
10608
10609 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10610 %{
10611 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10612 match(Set dst (CastLL dst));
10613
10614 effect(KILL cr, TEMP tmp);
10615 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10616 ins_encode %{
10617 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10618 %}
10619 ins_pipe(pipe_slow);
10620 %}
10621
10622 instruct castFF(regF dst)
10623 %{
10624 match(Set dst (CastFF dst));
10625
10626 size(0);
10627 format %{ "# castFF of $dst" %}
10628 ins_encode(/* empty encoding */);
10629 ins_cost(0);
10630 ins_pipe(empty);
10631 %}
10632
10633 instruct castHH(regF dst)
10634 %{
10635 match(Set dst (CastHH dst));
10636
10637 size(0);
10638 format %{ "# castHH of $dst" %}
10639 ins_encode(/* empty encoding */);
10640 ins_cost(0);
10641 ins_pipe(empty);
10642 %}
10643
10644 instruct castDD(regD dst)
10645 %{
10646 match(Set dst (CastDD dst));
10647
10648 size(0);
10649 format %{ "# castDD of $dst" %}
10650 ins_encode(/* empty encoding */);
10651 ins_cost(0);
10652 ins_pipe(empty);
10653 %}
10654
10655 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10656 instruct compareAndSwapP(rRegI res,
10657 memory mem_ptr,
10658 rax_RegP oldval, rRegP newval,
10659 rFlagsReg cr)
10660 %{
10661 predicate(n->as_LoadStore()->barrier_data() == 0);
10662 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10663 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10664 effect(KILL cr, KILL oldval);
10665
10666 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10667 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10668 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10669 ins_encode %{
10670 __ lock();
10671 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10672 __ setcc(Assembler::equal, $res$$Register);
10673 %}
10674 ins_pipe( pipe_cmpxchg );
10675 %}
10676
10677 instruct compareAndSwapL(rRegI res,
10678 memory mem_ptr,
10679 rax_RegL oldval, rRegL newval,
10680 rFlagsReg cr)
10681 %{
10682 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10683 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10684 effect(KILL cr, KILL oldval);
10685
10686 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10687 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10688 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10689 ins_encode %{
10690 __ lock();
10691 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10692 __ setcc(Assembler::equal, $res$$Register);
10693 %}
10694 ins_pipe( pipe_cmpxchg );
10695 %}
10696
10697 instruct compareAndSwapI(rRegI res,
10698 memory mem_ptr,
10699 rax_RegI oldval, rRegI newval,
10700 rFlagsReg cr)
10701 %{
10702 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10703 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10704 effect(KILL cr, KILL oldval);
10705
10706 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10707 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10708 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10709 ins_encode %{
10710 __ lock();
10711 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10712 __ setcc(Assembler::equal, $res$$Register);
10713 %}
10714 ins_pipe( pipe_cmpxchg );
10715 %}
10716
10717 instruct compareAndSwapB(rRegI res,
10718 memory mem_ptr,
10719 rax_RegI oldval, rRegI newval,
10720 rFlagsReg cr)
10721 %{
10722 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10723 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10724 effect(KILL cr, KILL oldval);
10725
10726 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10727 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10728 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10729 ins_encode %{
10730 __ lock();
10731 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10732 __ setcc(Assembler::equal, $res$$Register);
10733 %}
10734 ins_pipe( pipe_cmpxchg );
10735 %}
10736
10737 instruct compareAndSwapS(rRegI res,
10738 memory mem_ptr,
10739 rax_RegI oldval, rRegI newval,
10740 rFlagsReg cr)
10741 %{
10742 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10743 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10744 effect(KILL cr, KILL oldval);
10745
10746 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10747 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10748 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10749 ins_encode %{
10750 __ lock();
10751 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10752 __ setcc(Assembler::equal, $res$$Register);
10753 %}
10754 ins_pipe( pipe_cmpxchg );
10755 %}
10756
10757 instruct compareAndSwapN(rRegI res,
10758 memory mem_ptr,
10759 rax_RegN oldval, rRegN newval,
10760 rFlagsReg cr) %{
10761 predicate(n->as_LoadStore()->barrier_data() == 0);
10762 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10763 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10764 effect(KILL cr, KILL oldval);
10765
10766 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10767 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10768 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10769 ins_encode %{
10770 __ lock();
10771 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10772 __ setcc(Assembler::equal, $res$$Register);
10773 %}
10774 ins_pipe( pipe_cmpxchg );
10775 %}
10776
10777 instruct compareAndExchangeB(
10778 memory mem_ptr,
10779 rax_RegI oldval, rRegI newval,
10780 rFlagsReg cr)
10781 %{
10782 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10783 effect(KILL cr);
10784
10785 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10786 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10787 ins_encode %{
10788 __ lock();
10789 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10790 %}
10791 ins_pipe( pipe_cmpxchg );
10792 %}
10793
10794 instruct compareAndExchangeS(
10795 memory mem_ptr,
10796 rax_RegI oldval, rRegI newval,
10797 rFlagsReg cr)
10798 %{
10799 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10800 effect(KILL cr);
10801
10802 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10803 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10804 ins_encode %{
10805 __ lock();
10806 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10807 %}
10808 ins_pipe( pipe_cmpxchg );
10809 %}
10810
10811 instruct compareAndExchangeI(
10812 memory mem_ptr,
10813 rax_RegI oldval, rRegI newval,
10814 rFlagsReg cr)
10815 %{
10816 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10817 effect(KILL cr);
10818
10819 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10820 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10821 ins_encode %{
10822 __ lock();
10823 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10824 %}
10825 ins_pipe( pipe_cmpxchg );
10826 %}
10827
10828 instruct compareAndExchangeL(
10829 memory mem_ptr,
10830 rax_RegL oldval, rRegL newval,
10831 rFlagsReg cr)
10832 %{
10833 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10834 effect(KILL cr);
10835
10836 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10837 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10838 ins_encode %{
10839 __ lock();
10840 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10841 %}
10842 ins_pipe( pipe_cmpxchg );
10843 %}
10844
10845 instruct compareAndExchangeN(
10846 memory mem_ptr,
10847 rax_RegN oldval, rRegN newval,
10848 rFlagsReg cr) %{
10849 predicate(n->as_LoadStore()->barrier_data() == 0);
10850 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10851 effect(KILL cr);
10852
10853 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10854 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10855 ins_encode %{
10856 __ lock();
10857 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10858 %}
10859 ins_pipe( pipe_cmpxchg );
10860 %}
10861
10862 instruct compareAndExchangeP(
10863 memory mem_ptr,
10864 rax_RegP oldval, rRegP newval,
10865 rFlagsReg cr)
10866 %{
10867 predicate(n->as_LoadStore()->barrier_data() == 0);
10868 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10869 effect(KILL cr);
10870
10871 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10872 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10873 ins_encode %{
10874 __ lock();
10875 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10876 %}
10877 ins_pipe( pipe_cmpxchg );
10878 %}
10879
10880 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10881 predicate(n->as_LoadStore()->result_not_used());
10882 match(Set dummy (GetAndAddB mem add));
10883 effect(KILL cr);
10884 format %{ "addb_lock $mem, $add" %}
10885 ins_encode %{
10886 __ lock();
10887 __ addb($mem$$Address, $add$$Register);
10888 %}
10889 ins_pipe(pipe_cmpxchg);
10890 %}
10891
10892 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10893 predicate(n->as_LoadStore()->result_not_used());
10894 match(Set dummy (GetAndAddB mem add));
10895 effect(KILL cr);
10896 format %{ "addb_lock $mem, $add" %}
10897 ins_encode %{
10898 __ lock();
10899 __ addb($mem$$Address, $add$$constant);
10900 %}
10901 ins_pipe(pipe_cmpxchg);
10902 %}
10903
10904 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10905 predicate(!n->as_LoadStore()->result_not_used());
10906 match(Set newval (GetAndAddB mem newval));
10907 effect(KILL cr);
10908 format %{ "xaddb_lock $mem, $newval" %}
10909 ins_encode %{
10910 __ lock();
10911 __ xaddb($mem$$Address, $newval$$Register);
10912 %}
10913 ins_pipe(pipe_cmpxchg);
10914 %}
10915
10916 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10917 predicate(n->as_LoadStore()->result_not_used());
10918 match(Set dummy (GetAndAddS mem add));
10919 effect(KILL cr);
10920 format %{ "addw_lock $mem, $add" %}
10921 ins_encode %{
10922 __ lock();
10923 __ addw($mem$$Address, $add$$Register);
10924 %}
10925 ins_pipe(pipe_cmpxchg);
10926 %}
10927
10928 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10929 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10930 match(Set dummy (GetAndAddS mem add));
10931 effect(KILL cr);
10932 format %{ "addw_lock $mem, $add" %}
10933 ins_encode %{
10934 __ lock();
10935 __ addw($mem$$Address, $add$$constant);
10936 %}
10937 ins_pipe(pipe_cmpxchg);
10938 %}
10939
10940 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10941 predicate(!n->as_LoadStore()->result_not_used());
10942 match(Set newval (GetAndAddS mem newval));
10943 effect(KILL cr);
10944 format %{ "xaddw_lock $mem, $newval" %}
10945 ins_encode %{
10946 __ lock();
10947 __ xaddw($mem$$Address, $newval$$Register);
10948 %}
10949 ins_pipe(pipe_cmpxchg);
10950 %}
10951
10952 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10953 predicate(n->as_LoadStore()->result_not_used());
10954 match(Set dummy (GetAndAddI mem add));
10955 effect(KILL cr);
10956 format %{ "addl_lock $mem, $add" %}
10957 ins_encode %{
10958 __ lock();
10959 __ addl($mem$$Address, $add$$Register);
10960 %}
10961 ins_pipe(pipe_cmpxchg);
10962 %}
10963
10964 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10965 predicate(n->as_LoadStore()->result_not_used());
10966 match(Set dummy (GetAndAddI mem add));
10967 effect(KILL cr);
10968 format %{ "addl_lock $mem, $add" %}
10969 ins_encode %{
10970 __ lock();
10971 __ addl($mem$$Address, $add$$constant);
10972 %}
10973 ins_pipe(pipe_cmpxchg);
10974 %}
10975
10976 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10977 predicate(!n->as_LoadStore()->result_not_used());
10978 match(Set newval (GetAndAddI mem newval));
10979 effect(KILL cr);
10980 format %{ "xaddl_lock $mem, $newval" %}
10981 ins_encode %{
10982 __ lock();
10983 __ xaddl($mem$$Address, $newval$$Register);
10984 %}
10985 ins_pipe(pipe_cmpxchg);
10986 %}
10987
10988 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10989 predicate(n->as_LoadStore()->result_not_used());
10990 match(Set dummy (GetAndAddL mem add));
10991 effect(KILL cr);
10992 format %{ "addq_lock $mem, $add" %}
10993 ins_encode %{
10994 __ lock();
10995 __ addq($mem$$Address, $add$$Register);
10996 %}
10997 ins_pipe(pipe_cmpxchg);
10998 %}
10999
11000 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11001 predicate(n->as_LoadStore()->result_not_used());
11002 match(Set dummy (GetAndAddL mem add));
11003 effect(KILL cr);
11004 format %{ "addq_lock $mem, $add" %}
11005 ins_encode %{
11006 __ lock();
11007 __ addq($mem$$Address, $add$$constant);
11008 %}
11009 ins_pipe(pipe_cmpxchg);
11010 %}
11011
11012 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11013 predicate(!n->as_LoadStore()->result_not_used());
11014 match(Set newval (GetAndAddL mem newval));
11015 effect(KILL cr);
11016 format %{ "xaddq_lock $mem, $newval" %}
11017 ins_encode %{
11018 __ lock();
11019 __ xaddq($mem$$Address, $newval$$Register);
11020 %}
11021 ins_pipe(pipe_cmpxchg);
11022 %}
11023
11024 instruct xchgB( memory mem, rRegI newval) %{
11025 match(Set newval (GetAndSetB mem newval));
11026 format %{ "XCHGB $newval,[$mem]" %}
11027 ins_encode %{
11028 __ xchgb($newval$$Register, $mem$$Address);
11029 %}
11030 ins_pipe( pipe_cmpxchg );
11031 %}
11032
11033 instruct xchgS( memory mem, rRegI newval) %{
11034 match(Set newval (GetAndSetS mem newval));
11035 format %{ "XCHGW $newval,[$mem]" %}
11036 ins_encode %{
11037 __ xchgw($newval$$Register, $mem$$Address);
11038 %}
11039 ins_pipe( pipe_cmpxchg );
11040 %}
11041
11042 instruct xchgI( memory mem, rRegI newval) %{
11043 match(Set newval (GetAndSetI mem newval));
11044 format %{ "XCHGL $newval,[$mem]" %}
11045 ins_encode %{
11046 __ xchgl($newval$$Register, $mem$$Address);
11047 %}
11048 ins_pipe( pipe_cmpxchg );
11049 %}
11050
11051 instruct xchgL( memory mem, rRegL newval) %{
11052 match(Set newval (GetAndSetL mem newval));
11053 format %{ "XCHGL $newval,[$mem]" %}
11054 ins_encode %{
11055 __ xchgq($newval$$Register, $mem$$Address);
11056 %}
11057 ins_pipe( pipe_cmpxchg );
11058 %}
11059
11060 instruct xchgP( memory mem, rRegP newval) %{
11061 match(Set newval (GetAndSetP mem newval));
11062 predicate(n->as_LoadStore()->barrier_data() == 0);
11063 format %{ "XCHGQ $newval,[$mem]" %}
11064 ins_encode %{
11065 __ xchgq($newval$$Register, $mem$$Address);
11066 %}
11067 ins_pipe( pipe_cmpxchg );
11068 %}
11069
11070 instruct xchgN( memory mem, rRegN newval) %{
11071 predicate(n->as_LoadStore()->barrier_data() == 0);
11072 match(Set newval (GetAndSetN mem newval));
11073 format %{ "XCHGL $newval,$mem]" %}
11074 ins_encode %{
11075 __ xchgl($newval$$Register, $mem$$Address);
11076 %}
11077 ins_pipe( pipe_cmpxchg );
11078 %}
11079
11080 //----------Abs Instructions-------------------------------------------
11081
11082 // Integer Absolute Instructions
11083 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11084 %{
11085 match(Set dst (AbsI src));
11086 effect(TEMP dst, KILL cr);
11087 format %{ "xorl $dst, $dst\t# abs int\n\t"
11088 "subl $dst, $src\n\t"
11089 "cmovll $dst, $src" %}
11090 ins_encode %{
11091 __ xorl($dst$$Register, $dst$$Register);
11092 __ subl($dst$$Register, $src$$Register);
11093 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11094 %}
11095
11096 ins_pipe(ialu_reg_reg);
11097 %}
11098
11099 // Long Absolute Instructions
11100 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11101 %{
11102 match(Set dst (AbsL src));
11103 effect(TEMP dst, KILL cr);
11104 format %{ "xorl $dst, $dst\t# abs long\n\t"
11105 "subq $dst, $src\n\t"
11106 "cmovlq $dst, $src" %}
11107 ins_encode %{
11108 __ xorl($dst$$Register, $dst$$Register);
11109 __ subq($dst$$Register, $src$$Register);
11110 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11111 %}
11112
11113 ins_pipe(ialu_reg_reg);
11114 %}
11115
11116 //----------Subtraction Instructions-------------------------------------------
11117
11118 // Integer Subtraction Instructions
11119 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11120 %{
11121 predicate(!UseAPX);
11122 match(Set dst (SubI dst src));
11123 effect(KILL cr);
11124 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11125
11126 format %{ "subl $dst, $src\t# int" %}
11127 ins_encode %{
11128 __ subl($dst$$Register, $src$$Register);
11129 %}
11130 ins_pipe(ialu_reg_reg);
11131 %}
11132
11133 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11134 %{
11135 predicate(UseAPX);
11136 match(Set dst (SubI src1 src2));
11137 effect(KILL cr);
11138 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11139
11140 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11141 ins_encode %{
11142 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11143 %}
11144 ins_pipe(ialu_reg_reg);
11145 %}
11146
11147 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11148 %{
11149 predicate(UseAPX);
11150 match(Set dst (SubI src1 src2));
11151 effect(KILL cr);
11152 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11153
11154 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11155 ins_encode %{
11156 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11157 %}
11158 ins_pipe(ialu_reg_reg);
11159 %}
11160
11161 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11162 %{
11163 predicate(UseAPX);
11164 match(Set dst (SubI (LoadI src1) src2));
11165 effect(KILL cr);
11166 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11167
11168 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11169 ins_encode %{
11170 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11171 %}
11172 ins_pipe(ialu_reg_reg);
11173 %}
11174
11175 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11176 %{
11177 predicate(!UseAPX);
11178 match(Set dst (SubI dst (LoadI src)));
11179 effect(KILL cr);
11180 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11181
11182 ins_cost(150);
11183 format %{ "subl $dst, $src\t# int" %}
11184 ins_encode %{
11185 __ subl($dst$$Register, $src$$Address);
11186 %}
11187 ins_pipe(ialu_reg_mem);
11188 %}
11189
11190 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11191 %{
11192 predicate(UseAPX);
11193 match(Set dst (SubI src1 (LoadI src2)));
11194 effect(KILL cr);
11195 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11196
11197 ins_cost(150);
11198 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11199 ins_encode %{
11200 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11201 %}
11202 ins_pipe(ialu_reg_mem);
11203 %}
11204
11205 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11206 %{
11207 predicate(UseAPX);
11208 match(Set dst (SubI (LoadI src1) src2));
11209 effect(KILL cr);
11210 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11211
11212 ins_cost(150);
11213 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11214 ins_encode %{
11215 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11216 %}
11217 ins_pipe(ialu_reg_mem);
11218 %}
11219
11220 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11221 %{
11222 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11223 effect(KILL cr);
11224 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11225
11226 ins_cost(150);
11227 format %{ "subl $dst, $src\t# int" %}
11228 ins_encode %{
11229 __ subl($dst$$Address, $src$$Register);
11230 %}
11231 ins_pipe(ialu_mem_reg);
11232 %}
11233
11234 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11235 %{
11236 predicate(!UseAPX);
11237 match(Set dst (SubL dst src));
11238 effect(KILL cr);
11239 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11240
11241 format %{ "subq $dst, $src\t# long" %}
11242 ins_encode %{
11243 __ subq($dst$$Register, $src$$Register);
11244 %}
11245 ins_pipe(ialu_reg_reg);
11246 %}
11247
11248 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11249 %{
11250 predicate(UseAPX);
11251 match(Set dst (SubL src1 src2));
11252 effect(KILL cr);
11253 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11254
11255 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11256 ins_encode %{
11257 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11258 %}
11259 ins_pipe(ialu_reg_reg);
11260 %}
11261
11262 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11263 %{
11264 predicate(UseAPX);
11265 match(Set dst (SubL src1 src2));
11266 effect(KILL cr);
11267 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11268
11269 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11270 ins_encode %{
11271 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11272 %}
11273 ins_pipe(ialu_reg_reg);
11274 %}
11275
11276 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11277 %{
11278 predicate(UseAPX);
11279 match(Set dst (SubL (LoadL src1) src2));
11280 effect(KILL cr);
11281 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11282
11283 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11284 ins_encode %{
11285 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11286 %}
11287 ins_pipe(ialu_reg_reg);
11288 %}
11289
11290 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11291 %{
11292 predicate(!UseAPX);
11293 match(Set dst (SubL dst (LoadL src)));
11294 effect(KILL cr);
11295 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11296
11297 ins_cost(150);
11298 format %{ "subq $dst, $src\t# long" %}
11299 ins_encode %{
11300 __ subq($dst$$Register, $src$$Address);
11301 %}
11302 ins_pipe(ialu_reg_mem);
11303 %}
11304
11305 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11306 %{
11307 predicate(UseAPX);
11308 match(Set dst (SubL src1 (LoadL src2)));
11309 effect(KILL cr);
11310 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11311
11312 ins_cost(150);
11313 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11314 ins_encode %{
11315 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11316 %}
11317 ins_pipe(ialu_reg_mem);
11318 %}
11319
11320 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11321 %{
11322 predicate(UseAPX);
11323 match(Set dst (SubL (LoadL src1) src2));
11324 effect(KILL cr);
11325 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11326
11327 ins_cost(150);
11328 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11329 ins_encode %{
11330 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11331 %}
11332 ins_pipe(ialu_reg_mem);
11333 %}
11334
11335 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11336 %{
11337 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11338 effect(KILL cr);
11339 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11340
11341 ins_cost(150);
11342 format %{ "subq $dst, $src\t# long" %}
11343 ins_encode %{
11344 __ subq($dst$$Address, $src$$Register);
11345 %}
11346 ins_pipe(ialu_mem_reg);
11347 %}
11348
11349 // Subtract from a pointer
11350 // XXX hmpf???
11351 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11352 %{
11353 match(Set dst (AddP dst (SubI zero src)));
11354 effect(KILL cr);
11355
11356 format %{ "subq $dst, $src\t# ptr - int" %}
11357 ins_encode %{
11358 __ subq($dst$$Register, $src$$Register);
11359 %}
11360 ins_pipe(ialu_reg_reg);
11361 %}
11362
11363 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11364 %{
11365 predicate(!UseAPX);
11366 match(Set dst (SubI zero dst));
11367 effect(KILL cr);
11368 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11369
11370 format %{ "negl $dst\t# int" %}
11371 ins_encode %{
11372 __ negl($dst$$Register);
11373 %}
11374 ins_pipe(ialu_reg);
11375 %}
11376
11377 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11378 %{
11379 predicate(UseAPX);
11380 match(Set dst (SubI zero src));
11381 effect(KILL cr);
11382 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11383
11384 format %{ "enegl $dst, $src\t# int ndd" %}
11385 ins_encode %{
11386 __ enegl($dst$$Register, $src$$Register, false);
11387 %}
11388 ins_pipe(ialu_reg);
11389 %}
11390
11391 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11392 %{
11393 predicate(!UseAPX);
11394 match(Set dst (NegI dst));
11395 effect(KILL cr);
11396 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11397
11398 format %{ "negl $dst\t# int" %}
11399 ins_encode %{
11400 __ negl($dst$$Register);
11401 %}
11402 ins_pipe(ialu_reg);
11403 %}
11404
11405 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11406 %{
11407 predicate(UseAPX);
11408 match(Set dst (NegI src));
11409 effect(KILL cr);
11410 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11411
11412 format %{ "enegl $dst, $src\t# int ndd" %}
11413 ins_encode %{
11414 __ enegl($dst$$Register, $src$$Register, false);
11415 %}
11416 ins_pipe(ialu_reg);
11417 %}
11418
11419 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11420 %{
11421 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11422 effect(KILL cr);
11423 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11424
11425 format %{ "negl $dst\t# int" %}
11426 ins_encode %{
11427 __ negl($dst$$Address);
11428 %}
11429 ins_pipe(ialu_reg);
11430 %}
11431
11432 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11433 %{
11434 predicate(!UseAPX);
11435 match(Set dst (SubL zero dst));
11436 effect(KILL cr);
11437 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11438
11439 format %{ "negq $dst\t# long" %}
11440 ins_encode %{
11441 __ negq($dst$$Register);
11442 %}
11443 ins_pipe(ialu_reg);
11444 %}
11445
11446 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11447 %{
11448 predicate(UseAPX);
11449 match(Set dst (SubL zero src));
11450 effect(KILL cr);
11451 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11452
11453 format %{ "enegq $dst, $src\t# long ndd" %}
11454 ins_encode %{
11455 __ enegq($dst$$Register, $src$$Register, false);
11456 %}
11457 ins_pipe(ialu_reg);
11458 %}
11459
11460 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11461 %{
11462 predicate(!UseAPX);
11463 match(Set dst (NegL dst));
11464 effect(KILL cr);
11465 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11466
11467 format %{ "negq $dst\t# int" %}
11468 ins_encode %{
11469 __ negq($dst$$Register);
11470 %}
11471 ins_pipe(ialu_reg);
11472 %}
11473
11474 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11475 %{
11476 predicate(UseAPX);
11477 match(Set dst (NegL src));
11478 effect(KILL cr);
11479 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11480
11481 format %{ "enegq $dst, $src\t# long ndd" %}
11482 ins_encode %{
11483 __ enegq($dst$$Register, $src$$Register, false);
11484 %}
11485 ins_pipe(ialu_reg);
11486 %}
11487
11488 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11489 %{
11490 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11491 effect(KILL cr);
11492 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11493
11494 format %{ "negq $dst\t# long" %}
11495 ins_encode %{
11496 __ negq($dst$$Address);
11497 %}
11498 ins_pipe(ialu_reg);
11499 %}
11500
11501 //----------Multiplication/Division Instructions-------------------------------
11502 // Integer Multiplication Instructions
11503 // Multiply Register
11504
11505 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11506 %{
11507 predicate(!UseAPX);
11508 match(Set dst (MulI dst src));
11509 effect(KILL cr);
11510
11511 ins_cost(300);
11512 format %{ "imull $dst, $src\t# int" %}
11513 ins_encode %{
11514 __ imull($dst$$Register, $src$$Register);
11515 %}
11516 ins_pipe(ialu_reg_reg_alu0);
11517 %}
11518
11519 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11520 %{
11521 predicate(UseAPX);
11522 match(Set dst (MulI src1 src2));
11523 effect(KILL cr);
11524 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11525
11526 ins_cost(300);
11527 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11528 ins_encode %{
11529 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11530 %}
11531 ins_pipe(ialu_reg_reg_alu0);
11532 %}
11533
11534 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11535 %{
11536 match(Set dst (MulI src imm));
11537 effect(KILL cr);
11538
11539 ins_cost(300);
11540 format %{ "imull $dst, $src, $imm\t# int" %}
11541 ins_encode %{
11542 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11543 %}
11544 ins_pipe(ialu_reg_reg_alu0);
11545 %}
11546
11547 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11548 %{
11549 predicate(!UseAPX);
11550 match(Set dst (MulI dst (LoadI src)));
11551 effect(KILL cr);
11552
11553 ins_cost(350);
11554 format %{ "imull $dst, $src\t# int" %}
11555 ins_encode %{
11556 __ imull($dst$$Register, $src$$Address);
11557 %}
11558 ins_pipe(ialu_reg_mem_alu0);
11559 %}
11560
11561 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11562 %{
11563 predicate(UseAPX);
11564 match(Set dst (MulI src1 (LoadI src2)));
11565 effect(KILL cr);
11566 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11567
11568 ins_cost(350);
11569 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11570 ins_encode %{
11571 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11572 %}
11573 ins_pipe(ialu_reg_mem_alu0);
11574 %}
11575
11576 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11577 %{
11578 match(Set dst (MulI (LoadI src) imm));
11579 effect(KILL cr);
11580
11581 ins_cost(300);
11582 format %{ "imull $dst, $src, $imm\t# int" %}
11583 ins_encode %{
11584 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11585 %}
11586 ins_pipe(ialu_reg_mem_alu0);
11587 %}
11588
11589 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11590 %{
11591 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11592 effect(KILL cr, KILL src2);
11593
11594 expand %{ mulI_rReg(dst, src1, cr);
11595 mulI_rReg(src2, src3, cr);
11596 addI_rReg(dst, src2, cr); %}
11597 %}
11598
11599 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11600 %{
11601 predicate(!UseAPX);
11602 match(Set dst (MulL dst src));
11603 effect(KILL cr);
11604
11605 ins_cost(300);
11606 format %{ "imulq $dst, $src\t# long" %}
11607 ins_encode %{
11608 __ imulq($dst$$Register, $src$$Register);
11609 %}
11610 ins_pipe(ialu_reg_reg_alu0);
11611 %}
11612
11613 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11614 %{
11615 predicate(UseAPX);
11616 match(Set dst (MulL src1 src2));
11617 effect(KILL cr);
11618 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11619
11620 ins_cost(300);
11621 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11622 ins_encode %{
11623 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11624 %}
11625 ins_pipe(ialu_reg_reg_alu0);
11626 %}
11627
11628 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11629 %{
11630 match(Set dst (MulL src imm));
11631 effect(KILL cr);
11632
11633 ins_cost(300);
11634 format %{ "imulq $dst, $src, $imm\t# long" %}
11635 ins_encode %{
11636 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11637 %}
11638 ins_pipe(ialu_reg_reg_alu0);
11639 %}
11640
11641 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11642 %{
11643 predicate(!UseAPX);
11644 match(Set dst (MulL dst (LoadL src)));
11645 effect(KILL cr);
11646
11647 ins_cost(350);
11648 format %{ "imulq $dst, $src\t# long" %}
11649 ins_encode %{
11650 __ imulq($dst$$Register, $src$$Address);
11651 %}
11652 ins_pipe(ialu_reg_mem_alu0);
11653 %}
11654
11655 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11656 %{
11657 predicate(UseAPX);
11658 match(Set dst (MulL src1 (LoadL src2)));
11659 effect(KILL cr);
11660 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11661
11662 ins_cost(350);
11663 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11664 ins_encode %{
11665 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11666 %}
11667 ins_pipe(ialu_reg_mem_alu0);
11668 %}
11669
11670 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11671 %{
11672 match(Set dst (MulL (LoadL src) imm));
11673 effect(KILL cr);
11674
11675 ins_cost(300);
11676 format %{ "imulq $dst, $src, $imm\t# long" %}
11677 ins_encode %{
11678 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11679 %}
11680 ins_pipe(ialu_reg_mem_alu0);
11681 %}
11682
11683 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11684 %{
11685 match(Set dst (MulHiL src rax));
11686 effect(USE_KILL rax, KILL cr);
11687
11688 ins_cost(300);
11689 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11690 ins_encode %{
11691 __ imulq($src$$Register);
11692 %}
11693 ins_pipe(ialu_reg_reg_alu0);
11694 %}
11695
11696 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11697 %{
11698 match(Set dst (UMulHiL src rax));
11699 effect(USE_KILL rax, KILL cr);
11700
11701 ins_cost(300);
11702 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11703 ins_encode %{
11704 __ mulq($src$$Register);
11705 %}
11706 ins_pipe(ialu_reg_reg_alu0);
11707 %}
11708
11709 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11710 rFlagsReg cr)
11711 %{
11712 match(Set rax (DivI rax div));
11713 effect(KILL rdx, KILL cr);
11714
11715 ins_cost(30*100+10*100); // XXX
11716 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11717 "jne,s normal\n\t"
11718 "xorl rdx, rdx\n\t"
11719 "cmpl $div, -1\n\t"
11720 "je,s done\n"
11721 "normal: cdql\n\t"
11722 "idivl $div\n"
11723 "done:" %}
11724 ins_encode(cdql_enc(div));
11725 ins_pipe(ialu_reg_reg_alu0);
11726 %}
11727
11728 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11729 rFlagsReg cr)
11730 %{
11731 match(Set rax (DivL rax div));
11732 effect(KILL rdx, KILL cr);
11733
11734 ins_cost(30*100+10*100); // XXX
11735 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11736 "cmpq rax, rdx\n\t"
11737 "jne,s normal\n\t"
11738 "xorl rdx, rdx\n\t"
11739 "cmpq $div, -1\n\t"
11740 "je,s done\n"
11741 "normal: cdqq\n\t"
11742 "idivq $div\n"
11743 "done:" %}
11744 ins_encode(cdqq_enc(div));
11745 ins_pipe(ialu_reg_reg_alu0);
11746 %}
11747
11748 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11749 %{
11750 match(Set rax (UDivI rax div));
11751 effect(KILL rdx, KILL cr);
11752
11753 ins_cost(300);
11754 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11755 ins_encode %{
11756 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11757 %}
11758 ins_pipe(ialu_reg_reg_alu0);
11759 %}
11760
11761 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11762 %{
11763 match(Set rax (UDivL rax div));
11764 effect(KILL rdx, KILL cr);
11765
11766 ins_cost(300);
11767 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11768 ins_encode %{
11769 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11770 %}
11771 ins_pipe(ialu_reg_reg_alu0);
11772 %}
11773
11774 // Integer DIVMOD with Register, both quotient and mod results
11775 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11776 rFlagsReg cr)
11777 %{
11778 match(DivModI rax div);
11779 effect(KILL cr);
11780
11781 ins_cost(30*100+10*100); // XXX
11782 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11783 "jne,s normal\n\t"
11784 "xorl rdx, rdx\n\t"
11785 "cmpl $div, -1\n\t"
11786 "je,s done\n"
11787 "normal: cdql\n\t"
11788 "idivl $div\n"
11789 "done:" %}
11790 ins_encode(cdql_enc(div));
11791 ins_pipe(pipe_slow);
11792 %}
11793
11794 // Long DIVMOD with Register, both quotient and mod results
11795 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11796 rFlagsReg cr)
11797 %{
11798 match(DivModL rax div);
11799 effect(KILL cr);
11800
11801 ins_cost(30*100+10*100); // XXX
11802 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11803 "cmpq rax, rdx\n\t"
11804 "jne,s normal\n\t"
11805 "xorl rdx, rdx\n\t"
11806 "cmpq $div, -1\n\t"
11807 "je,s done\n"
11808 "normal: cdqq\n\t"
11809 "idivq $div\n"
11810 "done:" %}
11811 ins_encode(cdqq_enc(div));
11812 ins_pipe(pipe_slow);
11813 %}
11814
11815 // Unsigned integer DIVMOD with Register, both quotient and mod results
11816 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11817 no_rax_rdx_RegI div, rFlagsReg cr)
11818 %{
11819 match(UDivModI rax div);
11820 effect(TEMP tmp, KILL cr);
11821
11822 ins_cost(300);
11823 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11824 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11825 %}
11826 ins_encode %{
11827 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11828 %}
11829 ins_pipe(pipe_slow);
11830 %}
11831
11832 // Unsigned long DIVMOD with Register, both quotient and mod results
11833 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11834 no_rax_rdx_RegL div, rFlagsReg cr)
11835 %{
11836 match(UDivModL rax div);
11837 effect(TEMP tmp, KILL cr);
11838
11839 ins_cost(300);
11840 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11841 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11842 %}
11843 ins_encode %{
11844 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11845 %}
11846 ins_pipe(pipe_slow);
11847 %}
11848
11849 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11850 rFlagsReg cr)
11851 %{
11852 match(Set rdx (ModI rax div));
11853 effect(KILL rax, KILL cr);
11854
11855 ins_cost(300); // XXX
11856 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11857 "jne,s normal\n\t"
11858 "xorl rdx, rdx\n\t"
11859 "cmpl $div, -1\n\t"
11860 "je,s done\n"
11861 "normal: cdql\n\t"
11862 "idivl $div\n"
11863 "done:" %}
11864 ins_encode(cdql_enc(div));
11865 ins_pipe(ialu_reg_reg_alu0);
11866 %}
11867
11868 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11869 rFlagsReg cr)
11870 %{
11871 match(Set rdx (ModL rax div));
11872 effect(KILL rax, KILL cr);
11873
11874 ins_cost(300); // XXX
11875 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11876 "cmpq rax, rdx\n\t"
11877 "jne,s normal\n\t"
11878 "xorl rdx, rdx\n\t"
11879 "cmpq $div, -1\n\t"
11880 "je,s done\n"
11881 "normal: cdqq\n\t"
11882 "idivq $div\n"
11883 "done:" %}
11884 ins_encode(cdqq_enc(div));
11885 ins_pipe(ialu_reg_reg_alu0);
11886 %}
11887
11888 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11889 %{
11890 match(Set rdx (UModI rax div));
11891 effect(KILL rax, KILL cr);
11892
11893 ins_cost(300);
11894 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11895 ins_encode %{
11896 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11897 %}
11898 ins_pipe(ialu_reg_reg_alu0);
11899 %}
11900
11901 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11902 %{
11903 match(Set rdx (UModL rax div));
11904 effect(KILL rax, KILL cr);
11905
11906 ins_cost(300);
11907 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11908 ins_encode %{
11909 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11910 %}
11911 ins_pipe(ialu_reg_reg_alu0);
11912 %}
11913
11914 // Integer Shift Instructions
11915 // Shift Left by one, two, three
11916 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11917 %{
11918 predicate(!UseAPX);
11919 match(Set dst (LShiftI dst shift));
11920 effect(KILL cr);
11921
11922 format %{ "sall $dst, $shift" %}
11923 ins_encode %{
11924 __ sall($dst$$Register, $shift$$constant);
11925 %}
11926 ins_pipe(ialu_reg);
11927 %}
11928
11929 // Shift Left by one, two, three
11930 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11931 %{
11932 predicate(UseAPX);
11933 match(Set dst (LShiftI src shift));
11934 effect(KILL cr);
11935 flag(PD::Flag_ndd_demotable_opr1);
11936
11937 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11938 ins_encode %{
11939 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11940 %}
11941 ins_pipe(ialu_reg);
11942 %}
11943
11944 // Shift Left by 8-bit immediate
11945 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11946 %{
11947 predicate(!UseAPX);
11948 match(Set dst (LShiftI dst shift));
11949 effect(KILL cr);
11950
11951 format %{ "sall $dst, $shift" %}
11952 ins_encode %{
11953 __ sall($dst$$Register, $shift$$constant);
11954 %}
11955 ins_pipe(ialu_reg);
11956 %}
11957
11958 // Shift Left by 8-bit immediate
11959 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11960 %{
11961 predicate(UseAPX);
11962 match(Set dst (LShiftI src shift));
11963 effect(KILL cr);
11964 flag(PD::Flag_ndd_demotable_opr1);
11965
11966 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11967 ins_encode %{
11968 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11969 %}
11970 ins_pipe(ialu_reg);
11971 %}
11972
11973 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11974 %{
11975 predicate(UseAPX);
11976 match(Set dst (LShiftI (LoadI src) shift));
11977 effect(KILL cr);
11978
11979 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11980 ins_encode %{
11981 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11982 %}
11983 ins_pipe(ialu_reg);
11984 %}
11985
11986 // Shift Left by 8-bit immediate
11987 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11988 %{
11989 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11990 effect(KILL cr);
11991
11992 format %{ "sall $dst, $shift" %}
11993 ins_encode %{
11994 __ sall($dst$$Address, $shift$$constant);
11995 %}
11996 ins_pipe(ialu_mem_imm);
11997 %}
11998
11999 // Shift Left by variable
12000 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12001 %{
12002 predicate(!VM_Version::supports_bmi2());
12003 match(Set dst (LShiftI dst shift));
12004 effect(KILL cr);
12005
12006 format %{ "sall $dst, $shift" %}
12007 ins_encode %{
12008 __ sall($dst$$Register);
12009 %}
12010 ins_pipe(ialu_reg_reg);
12011 %}
12012
12013 // Shift Left by variable
12014 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12015 %{
12016 predicate(!VM_Version::supports_bmi2());
12017 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12018 effect(KILL cr);
12019
12020 format %{ "sall $dst, $shift" %}
12021 ins_encode %{
12022 __ sall($dst$$Address);
12023 %}
12024 ins_pipe(ialu_mem_reg);
12025 %}
12026
12027 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12028 %{
12029 predicate(VM_Version::supports_bmi2());
12030 match(Set dst (LShiftI src shift));
12031
12032 format %{ "shlxl $dst, $src, $shift" %}
12033 ins_encode %{
12034 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12035 %}
12036 ins_pipe(ialu_reg_reg);
12037 %}
12038
12039 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12040 %{
12041 predicate(VM_Version::supports_bmi2());
12042 match(Set dst (LShiftI (LoadI src) shift));
12043 ins_cost(175);
12044 format %{ "shlxl $dst, $src, $shift" %}
12045 ins_encode %{
12046 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12047 %}
12048 ins_pipe(ialu_reg_mem);
12049 %}
12050
12051 // Arithmetic Shift Right by 8-bit immediate
12052 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12053 %{
12054 predicate(!UseAPX);
12055 match(Set dst (RShiftI dst shift));
12056 effect(KILL cr);
12057
12058 format %{ "sarl $dst, $shift" %}
12059 ins_encode %{
12060 __ sarl($dst$$Register, $shift$$constant);
12061 %}
12062 ins_pipe(ialu_mem_imm);
12063 %}
12064
12065 // Arithmetic Shift Right by 8-bit immediate
12066 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12067 %{
12068 predicate(UseAPX);
12069 match(Set dst (RShiftI src shift));
12070 effect(KILL cr);
12071 flag(PD::Flag_ndd_demotable_opr1);
12072
12073 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12074 ins_encode %{
12075 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12076 %}
12077 ins_pipe(ialu_mem_imm);
12078 %}
12079
12080 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12081 %{
12082 predicate(UseAPX);
12083 match(Set dst (RShiftI (LoadI src) shift));
12084 effect(KILL cr);
12085
12086 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12087 ins_encode %{
12088 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12089 %}
12090 ins_pipe(ialu_mem_imm);
12091 %}
12092
12093 // Arithmetic Shift Right by 8-bit immediate
12094 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12095 %{
12096 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12097 effect(KILL cr);
12098
12099 format %{ "sarl $dst, $shift" %}
12100 ins_encode %{
12101 __ sarl($dst$$Address, $shift$$constant);
12102 %}
12103 ins_pipe(ialu_mem_imm);
12104 %}
12105
12106 // Arithmetic Shift Right by variable
12107 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12108 %{
12109 predicate(!VM_Version::supports_bmi2());
12110 match(Set dst (RShiftI dst shift));
12111 effect(KILL cr);
12112
12113 format %{ "sarl $dst, $shift" %}
12114 ins_encode %{
12115 __ sarl($dst$$Register);
12116 %}
12117 ins_pipe(ialu_reg_reg);
12118 %}
12119
12120 // Arithmetic Shift Right by variable
12121 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12122 %{
12123 predicate(!VM_Version::supports_bmi2());
12124 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12125 effect(KILL cr);
12126
12127 format %{ "sarl $dst, $shift" %}
12128 ins_encode %{
12129 __ sarl($dst$$Address);
12130 %}
12131 ins_pipe(ialu_mem_reg);
12132 %}
12133
12134 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12135 %{
12136 predicate(VM_Version::supports_bmi2());
12137 match(Set dst (RShiftI src shift));
12138
12139 format %{ "sarxl $dst, $src, $shift" %}
12140 ins_encode %{
12141 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12142 %}
12143 ins_pipe(ialu_reg_reg);
12144 %}
12145
12146 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12147 %{
12148 predicate(VM_Version::supports_bmi2());
12149 match(Set dst (RShiftI (LoadI src) shift));
12150 ins_cost(175);
12151 format %{ "sarxl $dst, $src, $shift" %}
12152 ins_encode %{
12153 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12154 %}
12155 ins_pipe(ialu_reg_mem);
12156 %}
12157
12158 // Logical Shift Right by 8-bit immediate
12159 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12160 %{
12161 predicate(!UseAPX);
12162 match(Set dst (URShiftI dst shift));
12163 effect(KILL cr);
12164
12165 format %{ "shrl $dst, $shift" %}
12166 ins_encode %{
12167 __ shrl($dst$$Register, $shift$$constant);
12168 %}
12169 ins_pipe(ialu_reg);
12170 %}
12171
12172 // Logical Shift Right by 8-bit immediate
12173 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12174 %{
12175 predicate(UseAPX);
12176 match(Set dst (URShiftI src shift));
12177 effect(KILL cr);
12178 flag(PD::Flag_ndd_demotable_opr1);
12179
12180 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12181 ins_encode %{
12182 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12183 %}
12184 ins_pipe(ialu_reg);
12185 %}
12186
12187 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12188 %{
12189 predicate(UseAPX);
12190 match(Set dst (URShiftI (LoadI src) shift));
12191 effect(KILL cr);
12192
12193 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12194 ins_encode %{
12195 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12196 %}
12197 ins_pipe(ialu_reg);
12198 %}
12199
12200 // Logical Shift Right by 8-bit immediate
12201 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12202 %{
12203 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12204 effect(KILL cr);
12205
12206 format %{ "shrl $dst, $shift" %}
12207 ins_encode %{
12208 __ shrl($dst$$Address, $shift$$constant);
12209 %}
12210 ins_pipe(ialu_mem_imm);
12211 %}
12212
12213 // Logical Shift Right by variable
12214 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12215 %{
12216 predicate(!VM_Version::supports_bmi2());
12217 match(Set dst (URShiftI dst shift));
12218 effect(KILL cr);
12219
12220 format %{ "shrl $dst, $shift" %}
12221 ins_encode %{
12222 __ shrl($dst$$Register);
12223 %}
12224 ins_pipe(ialu_reg_reg);
12225 %}
12226
12227 // Logical Shift Right by variable
12228 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12229 %{
12230 predicate(!VM_Version::supports_bmi2());
12231 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12232 effect(KILL cr);
12233
12234 format %{ "shrl $dst, $shift" %}
12235 ins_encode %{
12236 __ shrl($dst$$Address);
12237 %}
12238 ins_pipe(ialu_mem_reg);
12239 %}
12240
12241 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12242 %{
12243 predicate(VM_Version::supports_bmi2());
12244 match(Set dst (URShiftI src shift));
12245
12246 format %{ "shrxl $dst, $src, $shift" %}
12247 ins_encode %{
12248 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12249 %}
12250 ins_pipe(ialu_reg_reg);
12251 %}
12252
12253 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12254 %{
12255 predicate(VM_Version::supports_bmi2());
12256 match(Set dst (URShiftI (LoadI src) shift));
12257 ins_cost(175);
12258 format %{ "shrxl $dst, $src, $shift" %}
12259 ins_encode %{
12260 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12261 %}
12262 ins_pipe(ialu_reg_mem);
12263 %}
12264
12265 // Long Shift Instructions
12266 // Shift Left by one, two, three
12267 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12268 %{
12269 predicate(!UseAPX);
12270 match(Set dst (LShiftL dst shift));
12271 effect(KILL cr);
12272
12273 format %{ "salq $dst, $shift" %}
12274 ins_encode %{
12275 __ salq($dst$$Register, $shift$$constant);
12276 %}
12277 ins_pipe(ialu_reg);
12278 %}
12279
12280 // Shift Left by one, two, three
12281 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12282 %{
12283 predicate(UseAPX);
12284 match(Set dst (LShiftL src shift));
12285 effect(KILL cr);
12286 flag(PD::Flag_ndd_demotable_opr1);
12287
12288 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12289 ins_encode %{
12290 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12291 %}
12292 ins_pipe(ialu_reg);
12293 %}
12294
12295 // Shift Left by 8-bit immediate
12296 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12297 %{
12298 predicate(!UseAPX);
12299 match(Set dst (LShiftL dst shift));
12300 effect(KILL cr);
12301
12302 format %{ "salq $dst, $shift" %}
12303 ins_encode %{
12304 __ salq($dst$$Register, $shift$$constant);
12305 %}
12306 ins_pipe(ialu_reg);
12307 %}
12308
12309 // Shift Left by 8-bit immediate
12310 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12311 %{
12312 predicate(UseAPX);
12313 match(Set dst (LShiftL src shift));
12314 effect(KILL cr);
12315 flag(PD::Flag_ndd_demotable_opr1);
12316
12317 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12318 ins_encode %{
12319 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12320 %}
12321 ins_pipe(ialu_reg);
12322 %}
12323
12324 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12325 %{
12326 predicate(UseAPX);
12327 match(Set dst (LShiftL (LoadL src) shift));
12328 effect(KILL cr);
12329
12330 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12331 ins_encode %{
12332 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12333 %}
12334 ins_pipe(ialu_reg);
12335 %}
12336
12337 // Shift Left by 8-bit immediate
12338 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12339 %{
12340 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12341 effect(KILL cr);
12342
12343 format %{ "salq $dst, $shift" %}
12344 ins_encode %{
12345 __ salq($dst$$Address, $shift$$constant);
12346 %}
12347 ins_pipe(ialu_mem_imm);
12348 %}
12349
12350 // Shift Left by variable
12351 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12352 %{
12353 predicate(!VM_Version::supports_bmi2());
12354 match(Set dst (LShiftL dst shift));
12355 effect(KILL cr);
12356
12357 format %{ "salq $dst, $shift" %}
12358 ins_encode %{
12359 __ salq($dst$$Register);
12360 %}
12361 ins_pipe(ialu_reg_reg);
12362 %}
12363
12364 // Shift Left by variable
12365 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12366 %{
12367 predicate(!VM_Version::supports_bmi2());
12368 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12369 effect(KILL cr);
12370
12371 format %{ "salq $dst, $shift" %}
12372 ins_encode %{
12373 __ salq($dst$$Address);
12374 %}
12375 ins_pipe(ialu_mem_reg);
12376 %}
12377
12378 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12379 %{
12380 predicate(VM_Version::supports_bmi2());
12381 match(Set dst (LShiftL src shift));
12382
12383 format %{ "shlxq $dst, $src, $shift" %}
12384 ins_encode %{
12385 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12386 %}
12387 ins_pipe(ialu_reg_reg);
12388 %}
12389
12390 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12391 %{
12392 predicate(VM_Version::supports_bmi2());
12393 match(Set dst (LShiftL (LoadL src) shift));
12394 ins_cost(175);
12395 format %{ "shlxq $dst, $src, $shift" %}
12396 ins_encode %{
12397 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12398 %}
12399 ins_pipe(ialu_reg_mem);
12400 %}
12401
12402 // Arithmetic Shift Right by 8-bit immediate
12403 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12404 %{
12405 predicate(!UseAPX);
12406 match(Set dst (RShiftL dst shift));
12407 effect(KILL cr);
12408
12409 format %{ "sarq $dst, $shift" %}
12410 ins_encode %{
12411 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12412 %}
12413 ins_pipe(ialu_mem_imm);
12414 %}
12415
12416 // Arithmetic Shift Right by 8-bit immediate
12417 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12418 %{
12419 predicate(UseAPX);
12420 match(Set dst (RShiftL src shift));
12421 effect(KILL cr);
12422 flag(PD::Flag_ndd_demotable_opr1);
12423
12424 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12425 ins_encode %{
12426 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12427 %}
12428 ins_pipe(ialu_mem_imm);
12429 %}
12430
12431 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12432 %{
12433 predicate(UseAPX);
12434 match(Set dst (RShiftL (LoadL src) shift));
12435 effect(KILL cr);
12436
12437 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12438 ins_encode %{
12439 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12440 %}
12441 ins_pipe(ialu_mem_imm);
12442 %}
12443
12444 // Arithmetic Shift Right by 8-bit immediate
12445 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12446 %{
12447 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12448 effect(KILL cr);
12449
12450 format %{ "sarq $dst, $shift" %}
12451 ins_encode %{
12452 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12453 %}
12454 ins_pipe(ialu_mem_imm);
12455 %}
12456
12457 // Arithmetic Shift Right by variable
12458 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12459 %{
12460 predicate(!VM_Version::supports_bmi2());
12461 match(Set dst (RShiftL dst shift));
12462 effect(KILL cr);
12463
12464 format %{ "sarq $dst, $shift" %}
12465 ins_encode %{
12466 __ sarq($dst$$Register);
12467 %}
12468 ins_pipe(ialu_reg_reg);
12469 %}
12470
12471 // Arithmetic Shift Right by variable
12472 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12473 %{
12474 predicate(!VM_Version::supports_bmi2());
12475 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12476 effect(KILL cr);
12477
12478 format %{ "sarq $dst, $shift" %}
12479 ins_encode %{
12480 __ sarq($dst$$Address);
12481 %}
12482 ins_pipe(ialu_mem_reg);
12483 %}
12484
12485 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12486 %{
12487 predicate(VM_Version::supports_bmi2());
12488 match(Set dst (RShiftL src shift));
12489
12490 format %{ "sarxq $dst, $src, $shift" %}
12491 ins_encode %{
12492 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12493 %}
12494 ins_pipe(ialu_reg_reg);
12495 %}
12496
12497 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12498 %{
12499 predicate(VM_Version::supports_bmi2());
12500 match(Set dst (RShiftL (LoadL src) shift));
12501 ins_cost(175);
12502 format %{ "sarxq $dst, $src, $shift" %}
12503 ins_encode %{
12504 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12505 %}
12506 ins_pipe(ialu_reg_mem);
12507 %}
12508
12509 // Logical Shift Right by 8-bit immediate
12510 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12511 %{
12512 predicate(!UseAPX);
12513 match(Set dst (URShiftL dst shift));
12514 effect(KILL cr);
12515
12516 format %{ "shrq $dst, $shift" %}
12517 ins_encode %{
12518 __ shrq($dst$$Register, $shift$$constant);
12519 %}
12520 ins_pipe(ialu_reg);
12521 %}
12522
12523 // Logical Shift Right by 8-bit immediate
12524 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12525 %{
12526 predicate(UseAPX);
12527 match(Set dst (URShiftL src shift));
12528 effect(KILL cr);
12529 flag(PD::Flag_ndd_demotable_opr1);
12530
12531 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12532 ins_encode %{
12533 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12534 %}
12535 ins_pipe(ialu_reg);
12536 %}
12537
12538 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12539 %{
12540 predicate(UseAPX);
12541 match(Set dst (URShiftL (LoadL src) shift));
12542 effect(KILL cr);
12543
12544 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12545 ins_encode %{
12546 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12547 %}
12548 ins_pipe(ialu_reg);
12549 %}
12550
12551 // Logical Shift Right by 8-bit immediate
12552 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12553 %{
12554 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12555 effect(KILL cr);
12556
12557 format %{ "shrq $dst, $shift" %}
12558 ins_encode %{
12559 __ shrq($dst$$Address, $shift$$constant);
12560 %}
12561 ins_pipe(ialu_mem_imm);
12562 %}
12563
12564 // Logical Shift Right by variable
12565 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12566 %{
12567 predicate(!VM_Version::supports_bmi2());
12568 match(Set dst (URShiftL dst shift));
12569 effect(KILL cr);
12570
12571 format %{ "shrq $dst, $shift" %}
12572 ins_encode %{
12573 __ shrq($dst$$Register);
12574 %}
12575 ins_pipe(ialu_reg_reg);
12576 %}
12577
12578 // Logical Shift Right by variable
12579 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12580 %{
12581 predicate(!VM_Version::supports_bmi2());
12582 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12583 effect(KILL cr);
12584
12585 format %{ "shrq $dst, $shift" %}
12586 ins_encode %{
12587 __ shrq($dst$$Address);
12588 %}
12589 ins_pipe(ialu_mem_reg);
12590 %}
12591
12592 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12593 %{
12594 predicate(VM_Version::supports_bmi2());
12595 match(Set dst (URShiftL src shift));
12596
12597 format %{ "shrxq $dst, $src, $shift" %}
12598 ins_encode %{
12599 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12600 %}
12601 ins_pipe(ialu_reg_reg);
12602 %}
12603
12604 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12605 %{
12606 predicate(VM_Version::supports_bmi2());
12607 match(Set dst (URShiftL (LoadL src) shift));
12608 ins_cost(175);
12609 format %{ "shrxq $dst, $src, $shift" %}
12610 ins_encode %{
12611 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12612 %}
12613 ins_pipe(ialu_reg_mem);
12614 %}
12615
12616 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12617 // This idiom is used by the compiler for the i2b bytecode.
12618 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12619 %{
12620 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12621
12622 format %{ "movsbl $dst, $src\t# i2b" %}
12623 ins_encode %{
12624 __ movsbl($dst$$Register, $src$$Register);
12625 %}
12626 ins_pipe(ialu_reg_reg);
12627 %}
12628
12629 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12630 // This idiom is used by the compiler the i2s bytecode.
12631 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12632 %{
12633 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12634
12635 format %{ "movswl $dst, $src\t# i2s" %}
12636 ins_encode %{
12637 __ movswl($dst$$Register, $src$$Register);
12638 %}
12639 ins_pipe(ialu_reg_reg);
12640 %}
12641
12642 // ROL/ROR instructions
12643
12644 // Rotate left by constant.
12645 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12646 %{
12647 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12648 match(Set dst (RotateLeft dst shift));
12649 effect(KILL cr);
12650 format %{ "roll $dst, $shift" %}
12651 ins_encode %{
12652 __ roll($dst$$Register, $shift$$constant);
12653 %}
12654 ins_pipe(ialu_reg);
12655 %}
12656
12657 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12658 %{
12659 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12660 match(Set dst (RotateLeft src shift));
12661 format %{ "rolxl $dst, $src, $shift" %}
12662 ins_encode %{
12663 int shift = 32 - ($shift$$constant & 31);
12664 __ rorxl($dst$$Register, $src$$Register, shift);
12665 %}
12666 ins_pipe(ialu_reg_reg);
12667 %}
12668
12669 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12670 %{
12671 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12672 match(Set dst (RotateLeft (LoadI src) shift));
12673 ins_cost(175);
12674 format %{ "rolxl $dst, $src, $shift" %}
12675 ins_encode %{
12676 int shift = 32 - ($shift$$constant & 31);
12677 __ rorxl($dst$$Register, $src$$Address, shift);
12678 %}
12679 ins_pipe(ialu_reg_mem);
12680 %}
12681
12682 // Rotate Left by variable
12683 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12684 %{
12685 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12686 match(Set dst (RotateLeft dst shift));
12687 effect(KILL cr);
12688 format %{ "roll $dst, $shift" %}
12689 ins_encode %{
12690 __ roll($dst$$Register);
12691 %}
12692 ins_pipe(ialu_reg_reg);
12693 %}
12694
12695 // Rotate Left by variable
12696 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12697 %{
12698 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12699 match(Set dst (RotateLeft src shift));
12700 effect(KILL cr);
12701 flag(PD::Flag_ndd_demotable_opr1);
12702
12703 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12704 ins_encode %{
12705 __ eroll($dst$$Register, $src$$Register, false);
12706 %}
12707 ins_pipe(ialu_reg_reg);
12708 %}
12709
12710 // Rotate Right by constant.
12711 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12712 %{
12713 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12714 match(Set dst (RotateRight dst shift));
12715 effect(KILL cr);
12716 format %{ "rorl $dst, $shift" %}
12717 ins_encode %{
12718 __ rorl($dst$$Register, $shift$$constant);
12719 %}
12720 ins_pipe(ialu_reg);
12721 %}
12722
12723 // Rotate Right by constant.
12724 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12725 %{
12726 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12727 match(Set dst (RotateRight src shift));
12728 format %{ "rorxl $dst, $src, $shift" %}
12729 ins_encode %{
12730 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12731 %}
12732 ins_pipe(ialu_reg_reg);
12733 %}
12734
12735 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12736 %{
12737 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12738 match(Set dst (RotateRight (LoadI src) shift));
12739 ins_cost(175);
12740 format %{ "rorxl $dst, $src, $shift" %}
12741 ins_encode %{
12742 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12743 %}
12744 ins_pipe(ialu_reg_mem);
12745 %}
12746
12747 // Rotate Right by variable
12748 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12749 %{
12750 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12751 match(Set dst (RotateRight dst shift));
12752 effect(KILL cr);
12753 format %{ "rorl $dst, $shift" %}
12754 ins_encode %{
12755 __ rorl($dst$$Register);
12756 %}
12757 ins_pipe(ialu_reg_reg);
12758 %}
12759
12760 // Rotate Right by variable
12761 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12762 %{
12763 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12764 match(Set dst (RotateRight src shift));
12765 effect(KILL cr);
12766 flag(PD::Flag_ndd_demotable_opr1);
12767
12768 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12769 ins_encode %{
12770 __ erorl($dst$$Register, $src$$Register, false);
12771 %}
12772 ins_pipe(ialu_reg_reg);
12773 %}
12774
12775 // Rotate Left by constant.
12776 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12777 %{
12778 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12779 match(Set dst (RotateLeft dst shift));
12780 effect(KILL cr);
12781 format %{ "rolq $dst, $shift" %}
12782 ins_encode %{
12783 __ rolq($dst$$Register, $shift$$constant);
12784 %}
12785 ins_pipe(ialu_reg);
12786 %}
12787
12788 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12789 %{
12790 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12791 match(Set dst (RotateLeft src shift));
12792 format %{ "rolxq $dst, $src, $shift" %}
12793 ins_encode %{
12794 int shift = 64 - ($shift$$constant & 63);
12795 __ rorxq($dst$$Register, $src$$Register, shift);
12796 %}
12797 ins_pipe(ialu_reg_reg);
12798 %}
12799
12800 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12801 %{
12802 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12803 match(Set dst (RotateLeft (LoadL src) shift));
12804 ins_cost(175);
12805 format %{ "rolxq $dst, $src, $shift" %}
12806 ins_encode %{
12807 int shift = 64 - ($shift$$constant & 63);
12808 __ rorxq($dst$$Register, $src$$Address, shift);
12809 %}
12810 ins_pipe(ialu_reg_mem);
12811 %}
12812
12813 // Rotate Left by variable
12814 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12815 %{
12816 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12817 match(Set dst (RotateLeft dst shift));
12818 effect(KILL cr);
12819
12820 format %{ "rolq $dst, $shift" %}
12821 ins_encode %{
12822 __ rolq($dst$$Register);
12823 %}
12824 ins_pipe(ialu_reg_reg);
12825 %}
12826
12827 // Rotate Left by variable
12828 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12829 %{
12830 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12831 match(Set dst (RotateLeft src shift));
12832 effect(KILL cr);
12833 flag(PD::Flag_ndd_demotable_opr1);
12834
12835 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12836 ins_encode %{
12837 __ erolq($dst$$Register, $src$$Register, false);
12838 %}
12839 ins_pipe(ialu_reg_reg);
12840 %}
12841
12842 // Rotate Right by constant.
12843 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12844 %{
12845 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12846 match(Set dst (RotateRight dst shift));
12847 effect(KILL cr);
12848 format %{ "rorq $dst, $shift" %}
12849 ins_encode %{
12850 __ rorq($dst$$Register, $shift$$constant);
12851 %}
12852 ins_pipe(ialu_reg);
12853 %}
12854
12855 // Rotate Right by constant
12856 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12857 %{
12858 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859 match(Set dst (RotateRight src shift));
12860 format %{ "rorxq $dst, $src, $shift" %}
12861 ins_encode %{
12862 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12863 %}
12864 ins_pipe(ialu_reg_reg);
12865 %}
12866
12867 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12868 %{
12869 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12870 match(Set dst (RotateRight (LoadL src) shift));
12871 ins_cost(175);
12872 format %{ "rorxq $dst, $src, $shift" %}
12873 ins_encode %{
12874 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12875 %}
12876 ins_pipe(ialu_reg_mem);
12877 %}
12878
12879 // Rotate Right by variable
12880 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12881 %{
12882 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12883 match(Set dst (RotateRight dst shift));
12884 effect(KILL cr);
12885 format %{ "rorq $dst, $shift" %}
12886 ins_encode %{
12887 __ rorq($dst$$Register);
12888 %}
12889 ins_pipe(ialu_reg_reg);
12890 %}
12891
12892 // Rotate Right by variable
12893 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12894 %{
12895 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12896 match(Set dst (RotateRight src shift));
12897 effect(KILL cr);
12898 flag(PD::Flag_ndd_demotable_opr1);
12899
12900 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12901 ins_encode %{
12902 __ erorq($dst$$Register, $src$$Register, false);
12903 %}
12904 ins_pipe(ialu_reg_reg);
12905 %}
12906
12907 //----------------------------- CompressBits/ExpandBits ------------------------
12908
12909 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12910 predicate(n->bottom_type()->isa_long());
12911 match(Set dst (CompressBits src mask));
12912 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12913 ins_encode %{
12914 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12915 %}
12916 ins_pipe( pipe_slow );
12917 %}
12918
12919 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12920 predicate(n->bottom_type()->isa_long());
12921 match(Set dst (ExpandBits src mask));
12922 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12923 ins_encode %{
12924 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12925 %}
12926 ins_pipe( pipe_slow );
12927 %}
12928
12929 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12930 predicate(n->bottom_type()->isa_long());
12931 match(Set dst (CompressBits src (LoadL mask)));
12932 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12933 ins_encode %{
12934 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12935 %}
12936 ins_pipe( pipe_slow );
12937 %}
12938
12939 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12940 predicate(n->bottom_type()->isa_long());
12941 match(Set dst (ExpandBits src (LoadL mask)));
12942 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12943 ins_encode %{
12944 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12945 %}
12946 ins_pipe( pipe_slow );
12947 %}
12948
12949
12950 // Logical Instructions
12951
12952 // Integer Logical Instructions
12953
12954 // And Instructions
12955 // And Register with Register
12956 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12957 %{
12958 predicate(!UseAPX);
12959 match(Set dst (AndI dst src));
12960 effect(KILL cr);
12961 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12962
12963 format %{ "andl $dst, $src\t# int" %}
12964 ins_encode %{
12965 __ andl($dst$$Register, $src$$Register);
12966 %}
12967 ins_pipe(ialu_reg_reg);
12968 %}
12969
12970 // And Register with Register using New Data Destination (NDD)
12971 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12972 %{
12973 predicate(UseAPX);
12974 match(Set dst (AndI src1 src2));
12975 effect(KILL cr);
12976 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12977
12978 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12979 ins_encode %{
12980 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12981
12982 %}
12983 ins_pipe(ialu_reg_reg);
12984 %}
12985
12986 // And Register with Immediate 255
12987 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12988 %{
12989 match(Set dst (AndI src mask));
12990
12991 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12992 ins_encode %{
12993 __ movzbl($dst$$Register, $src$$Register);
12994 %}
12995 ins_pipe(ialu_reg);
12996 %}
12997
12998 // And Register with Immediate 255 and promote to long
12999 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13000 %{
13001 match(Set dst (ConvI2L (AndI src mask)));
13002
13003 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
13004 ins_encode %{
13005 __ movzbl($dst$$Register, $src$$Register);
13006 %}
13007 ins_pipe(ialu_reg);
13008 %}
13009
13010 // And Register with Immediate 65535
13011 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13012 %{
13013 match(Set dst (AndI src mask));
13014
13015 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13016 ins_encode %{
13017 __ movzwl($dst$$Register, $src$$Register);
13018 %}
13019 ins_pipe(ialu_reg);
13020 %}
13021
13022 // And Register with Immediate 65535 and promote to long
13023 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13024 %{
13025 match(Set dst (ConvI2L (AndI src mask)));
13026
13027 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13028 ins_encode %{
13029 __ movzwl($dst$$Register, $src$$Register);
13030 %}
13031 ins_pipe(ialu_reg);
13032 %}
13033
13034 // Can skip int2long conversions after AND with small bitmask
13035 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13036 %{
13037 predicate(VM_Version::supports_bmi2());
13038 ins_cost(125);
13039 effect(TEMP tmp, KILL cr);
13040 match(Set dst (ConvI2L (AndI src mask)));
13041 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13042 ins_encode %{
13043 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13044 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13045 %}
13046 ins_pipe(ialu_reg_reg);
13047 %}
13048
13049 // And Register with Immediate
13050 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13051 %{
13052 predicate(!UseAPX);
13053 match(Set dst (AndI dst src));
13054 effect(KILL cr);
13055 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13056
13057 format %{ "andl $dst, $src\t# int" %}
13058 ins_encode %{
13059 __ andl($dst$$Register, $src$$constant);
13060 %}
13061 ins_pipe(ialu_reg);
13062 %}
13063
13064 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13065 %{
13066 predicate(UseAPX);
13067 match(Set dst (AndI src1 src2));
13068 effect(KILL cr);
13069 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13070
13071 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13072 ins_encode %{
13073 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13074 %}
13075 ins_pipe(ialu_reg);
13076 %}
13077
13078 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13079 %{
13080 predicate(UseAPX);
13081 match(Set dst (AndI (LoadI src1) src2));
13082 effect(KILL cr);
13083 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13084
13085 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13086 ins_encode %{
13087 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13088 %}
13089 ins_pipe(ialu_reg);
13090 %}
13091
13092 // And Register with Memory
13093 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13094 %{
13095 predicate(!UseAPX);
13096 match(Set dst (AndI dst (LoadI src)));
13097 effect(KILL cr);
13098 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13099
13100 ins_cost(150);
13101 format %{ "andl $dst, $src\t# int" %}
13102 ins_encode %{
13103 __ andl($dst$$Register, $src$$Address);
13104 %}
13105 ins_pipe(ialu_reg_mem);
13106 %}
13107
13108 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13109 %{
13110 predicate(UseAPX);
13111 match(Set dst (AndI src1 (LoadI src2)));
13112 effect(KILL cr);
13113 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13114
13115 ins_cost(150);
13116 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13117 ins_encode %{
13118 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13119 %}
13120 ins_pipe(ialu_reg_mem);
13121 %}
13122
13123 // And Memory with Register
13124 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13125 %{
13126 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13127 effect(KILL cr);
13128 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13129
13130 ins_cost(150);
13131 format %{ "andb $dst, $src\t# byte" %}
13132 ins_encode %{
13133 __ andb($dst$$Address, $src$$Register);
13134 %}
13135 ins_pipe(ialu_mem_reg);
13136 %}
13137
13138 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13139 %{
13140 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13141 effect(KILL cr);
13142 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13143
13144 ins_cost(150);
13145 format %{ "andl $dst, $src\t# int" %}
13146 ins_encode %{
13147 __ andl($dst$$Address, $src$$Register);
13148 %}
13149 ins_pipe(ialu_mem_reg);
13150 %}
13151
13152 // And Memory with Immediate
13153 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13154 %{
13155 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13156 effect(KILL cr);
13157 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13158
13159 ins_cost(125);
13160 format %{ "andl $dst, $src\t# int" %}
13161 ins_encode %{
13162 __ andl($dst$$Address, $src$$constant);
13163 %}
13164 ins_pipe(ialu_mem_imm);
13165 %}
13166
13167 // BMI1 instructions
13168 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13169 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13170 predicate(UseBMI1Instructions);
13171 effect(KILL cr);
13172 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13173
13174 ins_cost(125);
13175 format %{ "andnl $dst, $src1, $src2" %}
13176
13177 ins_encode %{
13178 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13179 %}
13180 ins_pipe(ialu_reg_mem);
13181 %}
13182
13183 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13184 match(Set dst (AndI (XorI src1 minus_1) src2));
13185 predicate(UseBMI1Instructions);
13186 effect(KILL cr);
13187 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13188
13189 format %{ "andnl $dst, $src1, $src2" %}
13190
13191 ins_encode %{
13192 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13193 %}
13194 ins_pipe(ialu_reg);
13195 %}
13196
13197 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13198 match(Set dst (AndI (SubI imm_zero src) src));
13199 predicate(UseBMI1Instructions);
13200 effect(KILL cr);
13201 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13202
13203 format %{ "blsil $dst, $src" %}
13204
13205 ins_encode %{
13206 __ blsil($dst$$Register, $src$$Register);
13207 %}
13208 ins_pipe(ialu_reg);
13209 %}
13210
13211 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13212 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13213 predicate(UseBMI1Instructions);
13214 effect(KILL cr);
13215 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13216
13217 ins_cost(125);
13218 format %{ "blsil $dst, $src" %}
13219
13220 ins_encode %{
13221 __ blsil($dst$$Register, $src$$Address);
13222 %}
13223 ins_pipe(ialu_reg_mem);
13224 %}
13225
13226 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13227 %{
13228 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13229 predicate(UseBMI1Instructions);
13230 effect(KILL cr);
13231 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13232
13233 ins_cost(125);
13234 format %{ "blsmskl $dst, $src" %}
13235
13236 ins_encode %{
13237 __ blsmskl($dst$$Register, $src$$Address);
13238 %}
13239 ins_pipe(ialu_reg_mem);
13240 %}
13241
13242 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13243 %{
13244 match(Set dst (XorI (AddI src minus_1) src));
13245 predicate(UseBMI1Instructions);
13246 effect(KILL cr);
13247 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13248
13249 format %{ "blsmskl $dst, $src" %}
13250
13251 ins_encode %{
13252 __ blsmskl($dst$$Register, $src$$Register);
13253 %}
13254
13255 ins_pipe(ialu_reg);
13256 %}
13257
13258 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13259 %{
13260 match(Set dst (AndI (AddI src minus_1) src) );
13261 predicate(UseBMI1Instructions);
13262 effect(KILL cr);
13263 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13264
13265 format %{ "blsrl $dst, $src" %}
13266
13267 ins_encode %{
13268 __ blsrl($dst$$Register, $src$$Register);
13269 %}
13270
13271 ins_pipe(ialu_reg_mem);
13272 %}
13273
13274 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13275 %{
13276 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13277 predicate(UseBMI1Instructions);
13278 effect(KILL cr);
13279 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13280
13281 ins_cost(125);
13282 format %{ "blsrl $dst, $src" %}
13283
13284 ins_encode %{
13285 __ blsrl($dst$$Register, $src$$Address);
13286 %}
13287
13288 ins_pipe(ialu_reg);
13289 %}
13290
13291 // Or Instructions
13292 // Or Register with Register
13293 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13294 %{
13295 predicate(!UseAPX);
13296 match(Set dst (OrI dst src));
13297 effect(KILL cr);
13298 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13299
13300 format %{ "orl $dst, $src\t# int" %}
13301 ins_encode %{
13302 __ orl($dst$$Register, $src$$Register);
13303 %}
13304 ins_pipe(ialu_reg_reg);
13305 %}
13306
13307 // Or Register with Register using New Data Destination (NDD)
13308 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13309 %{
13310 predicate(UseAPX);
13311 match(Set dst (OrI src1 src2));
13312 effect(KILL cr);
13313 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13314
13315 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13316 ins_encode %{
13317 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13318 %}
13319 ins_pipe(ialu_reg_reg);
13320 %}
13321
13322 // Or Register with Immediate
13323 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13324 %{
13325 predicate(!UseAPX);
13326 match(Set dst (OrI dst src));
13327 effect(KILL cr);
13328 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13329
13330 format %{ "orl $dst, $src\t# int" %}
13331 ins_encode %{
13332 __ orl($dst$$Register, $src$$constant);
13333 %}
13334 ins_pipe(ialu_reg);
13335 %}
13336
13337 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13338 %{
13339 predicate(UseAPX);
13340 match(Set dst (OrI src1 src2));
13341 effect(KILL cr);
13342 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13343
13344 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13345 ins_encode %{
13346 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13347 %}
13348 ins_pipe(ialu_reg);
13349 %}
13350
13351 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13352 %{
13353 predicate(UseAPX);
13354 match(Set dst (OrI src1 src2));
13355 effect(KILL cr);
13356 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13357
13358 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13359 ins_encode %{
13360 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13361 %}
13362 ins_pipe(ialu_reg);
13363 %}
13364
13365 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13366 %{
13367 predicate(UseAPX);
13368 match(Set dst (OrI (LoadI src1) src2));
13369 effect(KILL cr);
13370 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13371
13372 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13373 ins_encode %{
13374 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13375 %}
13376 ins_pipe(ialu_reg);
13377 %}
13378
13379 // Or Register with Memory
13380 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13381 %{
13382 predicate(!UseAPX);
13383 match(Set dst (OrI dst (LoadI src)));
13384 effect(KILL cr);
13385 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13386
13387 ins_cost(150);
13388 format %{ "orl $dst, $src\t# int" %}
13389 ins_encode %{
13390 __ orl($dst$$Register, $src$$Address);
13391 %}
13392 ins_pipe(ialu_reg_mem);
13393 %}
13394
13395 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13396 %{
13397 predicate(UseAPX);
13398 match(Set dst (OrI src1 (LoadI src2)));
13399 effect(KILL cr);
13400 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13401
13402 ins_cost(150);
13403 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13404 ins_encode %{
13405 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13406 %}
13407 ins_pipe(ialu_reg_mem);
13408 %}
13409
13410 // Or Memory with Register
13411 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13412 %{
13413 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13414 effect(KILL cr);
13415 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13416
13417 ins_cost(150);
13418 format %{ "orb $dst, $src\t# byte" %}
13419 ins_encode %{
13420 __ orb($dst$$Address, $src$$Register);
13421 %}
13422 ins_pipe(ialu_mem_reg);
13423 %}
13424
13425 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13426 %{
13427 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13428 effect(KILL cr);
13429 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13430
13431 ins_cost(150);
13432 format %{ "orl $dst, $src\t# int" %}
13433 ins_encode %{
13434 __ orl($dst$$Address, $src$$Register);
13435 %}
13436 ins_pipe(ialu_mem_reg);
13437 %}
13438
13439 // Or Memory with Immediate
13440 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13441 %{
13442 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13443 effect(KILL cr);
13444 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13445
13446 ins_cost(125);
13447 format %{ "orl $dst, $src\t# int" %}
13448 ins_encode %{
13449 __ orl($dst$$Address, $src$$constant);
13450 %}
13451 ins_pipe(ialu_mem_imm);
13452 %}
13453
13454 // Xor Instructions
13455 // Xor Register with Register
13456 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13457 %{
13458 predicate(!UseAPX);
13459 match(Set dst (XorI dst src));
13460 effect(KILL cr);
13461 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13462
13463 format %{ "xorl $dst, $src\t# int" %}
13464 ins_encode %{
13465 __ xorl($dst$$Register, $src$$Register);
13466 %}
13467 ins_pipe(ialu_reg_reg);
13468 %}
13469
13470 // Xor Register with Register using New Data Destination (NDD)
13471 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13472 %{
13473 predicate(UseAPX);
13474 match(Set dst (XorI src1 src2));
13475 effect(KILL cr);
13476 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13477
13478 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13479 ins_encode %{
13480 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13481 %}
13482 ins_pipe(ialu_reg_reg);
13483 %}
13484
13485 // Xor Register with Immediate -1
13486 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13487 %{
13488 predicate(!UseAPX);
13489 match(Set dst (XorI dst imm));
13490
13491 format %{ "notl $dst" %}
13492 ins_encode %{
13493 __ notl($dst$$Register);
13494 %}
13495 ins_pipe(ialu_reg);
13496 %}
13497
13498 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13499 %{
13500 match(Set dst (XorI src imm));
13501 predicate(UseAPX);
13502 flag(PD::Flag_ndd_demotable_opr1);
13503
13504 format %{ "enotl $dst, $src" %}
13505 ins_encode %{
13506 __ enotl($dst$$Register, $src$$Register);
13507 %}
13508 ins_pipe(ialu_reg);
13509 %}
13510
13511 // Xor Register with Immediate
13512 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13513 %{
13514 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13515 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13516 match(Set dst (XorI dst src));
13517 effect(KILL cr);
13518 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13519
13520 format %{ "xorl $dst, $src\t# int" %}
13521 ins_encode %{
13522 __ xorl($dst$$Register, $src$$constant);
13523 %}
13524 ins_pipe(ialu_reg);
13525 %}
13526
13527 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13528 %{
13529 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13530 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13531 match(Set dst (XorI src1 src2));
13532 effect(KILL cr);
13533 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13534
13535 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13536 ins_encode %{
13537 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13538 %}
13539 ins_pipe(ialu_reg);
13540 %}
13541
13542 // Xor Memory with Immediate
13543 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13544 %{
13545 predicate(UseAPX);
13546 match(Set dst (XorI (LoadI src1) src2));
13547 effect(KILL cr);
13548 ins_cost(150);
13549 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13550
13551 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13552 ins_encode %{
13553 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13554 %}
13555 ins_pipe(ialu_reg);
13556 %}
13557
13558 // Xor Register with Memory
13559 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13560 %{
13561 predicate(!UseAPX);
13562 match(Set dst (XorI dst (LoadI src)));
13563 effect(KILL cr);
13564 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13565
13566 ins_cost(150);
13567 format %{ "xorl $dst, $src\t# int" %}
13568 ins_encode %{
13569 __ xorl($dst$$Register, $src$$Address);
13570 %}
13571 ins_pipe(ialu_reg_mem);
13572 %}
13573
13574 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13575 %{
13576 predicate(UseAPX);
13577 match(Set dst (XorI src1 (LoadI src2)));
13578 effect(KILL cr);
13579 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13580
13581 ins_cost(150);
13582 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13583 ins_encode %{
13584 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13585 %}
13586 ins_pipe(ialu_reg_mem);
13587 %}
13588
13589 // Xor Memory with Register
13590 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13591 %{
13592 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13593 effect(KILL cr);
13594 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13595
13596 ins_cost(150);
13597 format %{ "xorb $dst, $src\t# byte" %}
13598 ins_encode %{
13599 __ xorb($dst$$Address, $src$$Register);
13600 %}
13601 ins_pipe(ialu_mem_reg);
13602 %}
13603
13604 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13605 %{
13606 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13607 effect(KILL cr);
13608 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13609
13610 ins_cost(150);
13611 format %{ "xorl $dst, $src\t# int" %}
13612 ins_encode %{
13613 __ xorl($dst$$Address, $src$$Register);
13614 %}
13615 ins_pipe(ialu_mem_reg);
13616 %}
13617
13618 // Xor Memory with Immediate
13619 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13620 %{
13621 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13622 effect(KILL cr);
13623 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13624
13625 ins_cost(125);
13626 format %{ "xorl $dst, $src\t# int" %}
13627 ins_encode %{
13628 __ xorl($dst$$Address, $src$$constant);
13629 %}
13630 ins_pipe(ialu_mem_imm);
13631 %}
13632
13633
13634 // Long Logical Instructions
13635
13636 // And Instructions
13637 // And Register with Register
13638 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13639 %{
13640 predicate(!UseAPX);
13641 match(Set dst (AndL dst src));
13642 effect(KILL cr);
13643 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13644
13645 format %{ "andq $dst, $src\t# long" %}
13646 ins_encode %{
13647 __ andq($dst$$Register, $src$$Register);
13648 %}
13649 ins_pipe(ialu_reg_reg);
13650 %}
13651
13652 // And Register with Register using New Data Destination (NDD)
13653 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13654 %{
13655 predicate(UseAPX);
13656 match(Set dst (AndL src1 src2));
13657 effect(KILL cr);
13658 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13659
13660 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13661 ins_encode %{
13662 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13663
13664 %}
13665 ins_pipe(ialu_reg_reg);
13666 %}
13667
13668 // And Register with Immediate 255
13669 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13670 %{
13671 match(Set dst (AndL src mask));
13672
13673 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13674 ins_encode %{
13675 // movzbl zeroes out the upper 32-bit and does not need REX.W
13676 __ movzbl($dst$$Register, $src$$Register);
13677 %}
13678 ins_pipe(ialu_reg);
13679 %}
13680
13681 // And Register with Immediate 65535
13682 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13683 %{
13684 match(Set dst (AndL src mask));
13685
13686 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13687 ins_encode %{
13688 // movzwl zeroes out the upper 32-bit and does not need REX.W
13689 __ movzwl($dst$$Register, $src$$Register);
13690 %}
13691 ins_pipe(ialu_reg);
13692 %}
13693
13694 // And Register with Immediate
13695 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13696 %{
13697 predicate(!UseAPX);
13698 match(Set dst (AndL dst src));
13699 effect(KILL cr);
13700 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13701
13702 format %{ "andq $dst, $src\t# long" %}
13703 ins_encode %{
13704 __ andq($dst$$Register, $src$$constant);
13705 %}
13706 ins_pipe(ialu_reg);
13707 %}
13708
13709 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13710 %{
13711 predicate(UseAPX);
13712 match(Set dst (AndL src1 src2));
13713 effect(KILL cr);
13714 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13715
13716 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13717 ins_encode %{
13718 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13719 %}
13720 ins_pipe(ialu_reg);
13721 %}
13722
13723 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13724 %{
13725 predicate(UseAPX);
13726 match(Set dst (AndL (LoadL src1) src2));
13727 effect(KILL cr);
13728 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13729
13730 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13731 ins_encode %{
13732 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13733 %}
13734 ins_pipe(ialu_reg);
13735 %}
13736
13737 // And Register with Memory
13738 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13739 %{
13740 predicate(!UseAPX);
13741 match(Set dst (AndL dst (LoadL src)));
13742 effect(KILL cr);
13743 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13744
13745 ins_cost(150);
13746 format %{ "andq $dst, $src\t# long" %}
13747 ins_encode %{
13748 __ andq($dst$$Register, $src$$Address);
13749 %}
13750 ins_pipe(ialu_reg_mem);
13751 %}
13752
13753 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13754 %{
13755 predicate(UseAPX);
13756 match(Set dst (AndL src1 (LoadL src2)));
13757 effect(KILL cr);
13758 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13759
13760 ins_cost(150);
13761 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13762 ins_encode %{
13763 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13764 %}
13765 ins_pipe(ialu_reg_mem);
13766 %}
13767
13768 // And Memory with Register
13769 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13770 %{
13771 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13772 effect(KILL cr);
13773 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13774
13775 ins_cost(150);
13776 format %{ "andq $dst, $src\t# long" %}
13777 ins_encode %{
13778 __ andq($dst$$Address, $src$$Register);
13779 %}
13780 ins_pipe(ialu_mem_reg);
13781 %}
13782
13783 // And Memory with Immediate
13784 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13785 %{
13786 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13787 effect(KILL cr);
13788 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13789
13790 ins_cost(125);
13791 format %{ "andq $dst, $src\t# long" %}
13792 ins_encode %{
13793 __ andq($dst$$Address, $src$$constant);
13794 %}
13795 ins_pipe(ialu_mem_imm);
13796 %}
13797
13798 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13799 %{
13800 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13801 // because AND/OR works well enough for 8/32-bit values.
13802 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13803
13804 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13805 effect(KILL cr);
13806
13807 ins_cost(125);
13808 format %{ "btrq $dst, log2(not($con))\t# long" %}
13809 ins_encode %{
13810 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13811 %}
13812 ins_pipe(ialu_mem_imm);
13813 %}
13814
13815 // BMI1 instructions
13816 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13817 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13818 predicate(UseBMI1Instructions);
13819 effect(KILL cr);
13820 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13821
13822 ins_cost(125);
13823 format %{ "andnq $dst, $src1, $src2" %}
13824
13825 ins_encode %{
13826 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13827 %}
13828 ins_pipe(ialu_reg_mem);
13829 %}
13830
13831 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13832 match(Set dst (AndL (XorL src1 minus_1) src2));
13833 predicate(UseBMI1Instructions);
13834 effect(KILL cr);
13835 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13836
13837 format %{ "andnq $dst, $src1, $src2" %}
13838
13839 ins_encode %{
13840 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13841 %}
13842 ins_pipe(ialu_reg_mem);
13843 %}
13844
13845 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13846 match(Set dst (AndL (SubL imm_zero src) src));
13847 predicate(UseBMI1Instructions);
13848 effect(KILL cr);
13849 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13850
13851 format %{ "blsiq $dst, $src" %}
13852
13853 ins_encode %{
13854 __ blsiq($dst$$Register, $src$$Register);
13855 %}
13856 ins_pipe(ialu_reg);
13857 %}
13858
13859 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13860 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13861 predicate(UseBMI1Instructions);
13862 effect(KILL cr);
13863 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13864
13865 ins_cost(125);
13866 format %{ "blsiq $dst, $src" %}
13867
13868 ins_encode %{
13869 __ blsiq($dst$$Register, $src$$Address);
13870 %}
13871 ins_pipe(ialu_reg_mem);
13872 %}
13873
13874 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13875 %{
13876 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13877 predicate(UseBMI1Instructions);
13878 effect(KILL cr);
13879 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13880
13881 ins_cost(125);
13882 format %{ "blsmskq $dst, $src" %}
13883
13884 ins_encode %{
13885 __ blsmskq($dst$$Register, $src$$Address);
13886 %}
13887 ins_pipe(ialu_reg_mem);
13888 %}
13889
13890 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13891 %{
13892 match(Set dst (XorL (AddL src minus_1) src));
13893 predicate(UseBMI1Instructions);
13894 effect(KILL cr);
13895 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13896
13897 format %{ "blsmskq $dst, $src" %}
13898
13899 ins_encode %{
13900 __ blsmskq($dst$$Register, $src$$Register);
13901 %}
13902
13903 ins_pipe(ialu_reg);
13904 %}
13905
13906 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13907 %{
13908 match(Set dst (AndL (AddL src minus_1) src) );
13909 predicate(UseBMI1Instructions);
13910 effect(KILL cr);
13911 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13912
13913 format %{ "blsrq $dst, $src" %}
13914
13915 ins_encode %{
13916 __ blsrq($dst$$Register, $src$$Register);
13917 %}
13918
13919 ins_pipe(ialu_reg);
13920 %}
13921
13922 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13923 %{
13924 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13925 predicate(UseBMI1Instructions);
13926 effect(KILL cr);
13927 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13928
13929 ins_cost(125);
13930 format %{ "blsrq $dst, $src" %}
13931
13932 ins_encode %{
13933 __ blsrq($dst$$Register, $src$$Address);
13934 %}
13935
13936 ins_pipe(ialu_reg);
13937 %}
13938
13939 // Or Instructions
13940 // Or Register with Register
13941 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13942 %{
13943 predicate(!UseAPX);
13944 match(Set dst (OrL dst src));
13945 effect(KILL cr);
13946 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13947
13948 format %{ "orq $dst, $src\t# long" %}
13949 ins_encode %{
13950 __ orq($dst$$Register, $src$$Register);
13951 %}
13952 ins_pipe(ialu_reg_reg);
13953 %}
13954
13955 // Or Register with Register using New Data Destination (NDD)
13956 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13957 %{
13958 predicate(UseAPX);
13959 match(Set dst (OrL src1 src2));
13960 effect(KILL cr);
13961 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13962
13963 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13964 ins_encode %{
13965 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13966
13967 %}
13968 ins_pipe(ialu_reg_reg);
13969 %}
13970
13971 // Use any_RegP to match R15 (TLS register) without spilling.
13972 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13973 match(Set dst (OrL dst (CastP2X src)));
13974 effect(KILL cr);
13975 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13976
13977 format %{ "orq $dst, $src\t# long" %}
13978 ins_encode %{
13979 __ orq($dst$$Register, $src$$Register);
13980 %}
13981 ins_pipe(ialu_reg_reg);
13982 %}
13983
13984 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13985 match(Set dst (OrL src1 (CastP2X src2)));
13986 effect(KILL cr);
13987 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13988
13989 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13990 ins_encode %{
13991 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13992 %}
13993 ins_pipe(ialu_reg_reg);
13994 %}
13995
13996 // Or Register with Immediate
13997 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13998 %{
13999 predicate(!UseAPX);
14000 match(Set dst (OrL dst src));
14001 effect(KILL cr);
14002 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14003
14004 format %{ "orq $dst, $src\t# long" %}
14005 ins_encode %{
14006 __ orq($dst$$Register, $src$$constant);
14007 %}
14008 ins_pipe(ialu_reg);
14009 %}
14010
14011 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14012 %{
14013 predicate(UseAPX);
14014 match(Set dst (OrL src1 src2));
14015 effect(KILL cr);
14016 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14017
14018 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14019 ins_encode %{
14020 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14021 %}
14022 ins_pipe(ialu_reg);
14023 %}
14024
14025 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14026 %{
14027 predicate(UseAPX);
14028 match(Set dst (OrL src1 src2));
14029 effect(KILL cr);
14030 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14031
14032 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14033 ins_encode %{
14034 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14035 %}
14036 ins_pipe(ialu_reg);
14037 %}
14038
14039 // Or Memory with Immediate
14040 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14041 %{
14042 predicate(UseAPX);
14043 match(Set dst (OrL (LoadL src1) src2));
14044 effect(KILL cr);
14045 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14046
14047 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14048 ins_encode %{
14049 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14050 %}
14051 ins_pipe(ialu_reg);
14052 %}
14053
14054 // Or Register with Memory
14055 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14056 %{
14057 predicate(!UseAPX);
14058 match(Set dst (OrL dst (LoadL src)));
14059 effect(KILL cr);
14060 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14061
14062 ins_cost(150);
14063 format %{ "orq $dst, $src\t# long" %}
14064 ins_encode %{
14065 __ orq($dst$$Register, $src$$Address);
14066 %}
14067 ins_pipe(ialu_reg_mem);
14068 %}
14069
14070 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14071 %{
14072 predicate(UseAPX);
14073 match(Set dst (OrL src1 (LoadL src2)));
14074 effect(KILL cr);
14075 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14076
14077 ins_cost(150);
14078 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14079 ins_encode %{
14080 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14081 %}
14082 ins_pipe(ialu_reg_mem);
14083 %}
14084
14085 // Or Memory with Register
14086 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14087 %{
14088 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14089 effect(KILL cr);
14090 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14091
14092 ins_cost(150);
14093 format %{ "orq $dst, $src\t# long" %}
14094 ins_encode %{
14095 __ orq($dst$$Address, $src$$Register);
14096 %}
14097 ins_pipe(ialu_mem_reg);
14098 %}
14099
14100 // Or Memory with Immediate
14101 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14102 %{
14103 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14104 effect(KILL cr);
14105 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14106
14107 ins_cost(125);
14108 format %{ "orq $dst, $src\t# long" %}
14109 ins_encode %{
14110 __ orq($dst$$Address, $src$$constant);
14111 %}
14112 ins_pipe(ialu_mem_imm);
14113 %}
14114
14115 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14116 %{
14117 // con should be a pure 64-bit power of 2 immediate
14118 // because AND/OR works well enough for 8/32-bit values.
14119 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14120
14121 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14122 effect(KILL cr);
14123
14124 ins_cost(125);
14125 format %{ "btsq $dst, log2($con)\t# long" %}
14126 ins_encode %{
14127 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14128 %}
14129 ins_pipe(ialu_mem_imm);
14130 %}
14131
14132 // Xor Instructions
14133 // Xor Register with Register
14134 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14135 %{
14136 predicate(!UseAPX);
14137 match(Set dst (XorL dst src));
14138 effect(KILL cr);
14139 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14140
14141 format %{ "xorq $dst, $src\t# long" %}
14142 ins_encode %{
14143 __ xorq($dst$$Register, $src$$Register);
14144 %}
14145 ins_pipe(ialu_reg_reg);
14146 %}
14147
14148 // Xor Register with Register using New Data Destination (NDD)
14149 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14150 %{
14151 predicate(UseAPX);
14152 match(Set dst (XorL src1 src2));
14153 effect(KILL cr);
14154 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14155
14156 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14157 ins_encode %{
14158 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14159 %}
14160 ins_pipe(ialu_reg_reg);
14161 %}
14162
14163 // Xor Register with Immediate -1
14164 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14165 %{
14166 predicate(!UseAPX);
14167 match(Set dst (XorL dst imm));
14168
14169 format %{ "notq $dst" %}
14170 ins_encode %{
14171 __ notq($dst$$Register);
14172 %}
14173 ins_pipe(ialu_reg);
14174 %}
14175
14176 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14177 %{
14178 predicate(UseAPX);
14179 match(Set dst (XorL src imm));
14180 flag(PD::Flag_ndd_demotable_opr1);
14181
14182 format %{ "enotq $dst, $src" %}
14183 ins_encode %{
14184 __ enotq($dst$$Register, $src$$Register);
14185 %}
14186 ins_pipe(ialu_reg);
14187 %}
14188
14189 // Xor Register with Immediate
14190 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14191 %{
14192 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14193 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14194 match(Set dst (XorL dst src));
14195 effect(KILL cr);
14196 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14197
14198 format %{ "xorq $dst, $src\t# long" %}
14199 ins_encode %{
14200 __ xorq($dst$$Register, $src$$constant);
14201 %}
14202 ins_pipe(ialu_reg);
14203 %}
14204
14205 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14206 %{
14207 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14208 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14209 match(Set dst (XorL src1 src2));
14210 effect(KILL cr);
14211 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14212
14213 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14214 ins_encode %{
14215 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14216 %}
14217 ins_pipe(ialu_reg);
14218 %}
14219
14220 // Xor Memory with Immediate
14221 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14222 %{
14223 predicate(UseAPX);
14224 match(Set dst (XorL (LoadL src1) src2));
14225 effect(KILL cr);
14226 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14227 ins_cost(150);
14228
14229 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14230 ins_encode %{
14231 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14232 %}
14233 ins_pipe(ialu_reg);
14234 %}
14235
14236 // Xor Register with Memory
14237 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14238 %{
14239 predicate(!UseAPX);
14240 match(Set dst (XorL dst (LoadL src)));
14241 effect(KILL cr);
14242 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14243
14244 ins_cost(150);
14245 format %{ "xorq $dst, $src\t# long" %}
14246 ins_encode %{
14247 __ xorq($dst$$Register, $src$$Address);
14248 %}
14249 ins_pipe(ialu_reg_mem);
14250 %}
14251
14252 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14253 %{
14254 predicate(UseAPX);
14255 match(Set dst (XorL src1 (LoadL src2)));
14256 effect(KILL cr);
14257 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14258
14259 ins_cost(150);
14260 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14261 ins_encode %{
14262 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14263 %}
14264 ins_pipe(ialu_reg_mem);
14265 %}
14266
14267 // Xor Memory with Register
14268 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14269 %{
14270 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14271 effect(KILL cr);
14272 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14273
14274 ins_cost(150);
14275 format %{ "xorq $dst, $src\t# long" %}
14276 ins_encode %{
14277 __ xorq($dst$$Address, $src$$Register);
14278 %}
14279 ins_pipe(ialu_mem_reg);
14280 %}
14281
14282 // Xor Memory with Immediate
14283 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14284 %{
14285 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14286 effect(KILL cr);
14287 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14288
14289 ins_cost(125);
14290 format %{ "xorq $dst, $src\t# long" %}
14291 ins_encode %{
14292 __ xorq($dst$$Address, $src$$constant);
14293 %}
14294 ins_pipe(ialu_mem_imm);
14295 %}
14296
14297 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14298 %{
14299 match(Set dst (CmpLTMask p q));
14300 effect(KILL cr);
14301
14302 ins_cost(400);
14303 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14304 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14305 "negl $dst" %}
14306 ins_encode %{
14307 __ cmpl($p$$Register, $q$$Register);
14308 __ setcc(Assembler::less, $dst$$Register);
14309 __ negl($dst$$Register);
14310 %}
14311 ins_pipe(pipe_slow);
14312 %}
14313
14314 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14315 %{
14316 match(Set dst (CmpLTMask dst zero));
14317 effect(KILL cr);
14318
14319 ins_cost(100);
14320 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14321 ins_encode %{
14322 __ sarl($dst$$Register, 31);
14323 %}
14324 ins_pipe(ialu_reg);
14325 %}
14326
14327 /* Better to save a register than avoid a branch */
14328 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14329 %{
14330 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14331 effect(KILL cr);
14332 ins_cost(300);
14333 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14334 "jge done\n\t"
14335 "addl $p,$y\n"
14336 "done: " %}
14337 ins_encode %{
14338 Register Rp = $p$$Register;
14339 Register Rq = $q$$Register;
14340 Register Ry = $y$$Register;
14341 Label done;
14342 __ subl(Rp, Rq);
14343 __ jccb(Assembler::greaterEqual, done);
14344 __ addl(Rp, Ry);
14345 __ bind(done);
14346 %}
14347 ins_pipe(pipe_cmplt);
14348 %}
14349
14350 /* Better to save a register than avoid a branch */
14351 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14352 %{
14353 match(Set y (AndI (CmpLTMask p q) y));
14354 effect(KILL cr);
14355
14356 ins_cost(300);
14357
14358 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14359 "jlt done\n\t"
14360 "xorl $y, $y\n"
14361 "done: " %}
14362 ins_encode %{
14363 Register Rp = $p$$Register;
14364 Register Rq = $q$$Register;
14365 Register Ry = $y$$Register;
14366 Label done;
14367 __ cmpl(Rp, Rq);
14368 __ jccb(Assembler::less, done);
14369 __ xorl(Ry, Ry);
14370 __ bind(done);
14371 %}
14372 ins_pipe(pipe_cmplt);
14373 %}
14374
14375
14376 //---------- FP Instructions------------------------------------------------
14377
14378 // Really expensive, avoid
14379 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14380 %{
14381 match(Set cr (CmpF src1 src2));
14382
14383 ins_cost(500);
14384 format %{ "ucomiss $src1, $src2\n\t"
14385 "jnp,s exit\n\t"
14386 "pushfq\t# saw NaN, set CF\n\t"
14387 "andq [rsp], #0xffffff2b\n\t"
14388 "popfq\n"
14389 "exit:" %}
14390 ins_encode %{
14391 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14392 emit_cmpfp_fixup(masm);
14393 %}
14394 ins_pipe(pipe_slow);
14395 %}
14396
14397 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14398 match(Set cr (CmpF src1 src2));
14399
14400 ins_cost(100);
14401 format %{ "ucomiss $src1, $src2" %}
14402 ins_encode %{
14403 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14404 %}
14405 ins_pipe(pipe_slow);
14406 %}
14407
14408 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14409 match(Set cr (CmpF src1 src2));
14410
14411 ins_cost(100);
14412 format %{ "vucomxss $src1, $src2" %}
14413 ins_encode %{
14414 __ vucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14415 %}
14416 ins_pipe(pipe_slow);
14417 %}
14418
14419 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14420 match(Set cr (CmpF src1 (LoadF src2)));
14421
14422 ins_cost(100);
14423 format %{ "ucomiss $src1, $src2" %}
14424 ins_encode %{
14425 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14426 %}
14427 ins_pipe(pipe_slow);
14428 %}
14429
14430 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14431 match(Set cr (CmpF src1 (LoadF src2)));
14432
14433 ins_cost(100);
14434 format %{ "vucomxss $src1, $src2" %}
14435 ins_encode %{
14436 __ vucomxss($src1$$XMMRegister, $src2$$Address);
14437 %}
14438 ins_pipe(pipe_slow);
14439 %}
14440
14441 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14442 match(Set cr (CmpF src con));
14443
14444 ins_cost(100);
14445 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14446 ins_encode %{
14447 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14448 %}
14449 ins_pipe(pipe_slow);
14450 %}
14451
14452 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14453 match(Set cr (CmpF src con));
14454
14455 ins_cost(100);
14456 format %{ "vucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14457 ins_encode %{
14458 __ vucomxss($src$$XMMRegister, $constantaddress($con));
14459 %}
14460 ins_pipe(pipe_slow);
14461 %}
14462
14463 // Really expensive, avoid
14464 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14465 %{
14466 match(Set cr (CmpD src1 src2));
14467
14468 ins_cost(500);
14469 format %{ "ucomisd $src1, $src2\n\t"
14470 "jnp,s exit\n\t"
14471 "pushfq\t# saw NaN, set CF\n\t"
14472 "andq [rsp], #0xffffff2b\n\t"
14473 "popfq\n"
14474 "exit:" %}
14475 ins_encode %{
14476 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14477 emit_cmpfp_fixup(masm);
14478 %}
14479 ins_pipe(pipe_slow);
14480 %}
14481
14482 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14483 match(Set cr (CmpD src1 src2));
14484
14485 ins_cost(100);
14486 format %{ "ucomisd $src1, $src2 test" %}
14487 ins_encode %{
14488 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14489 %}
14490 ins_pipe(pipe_slow);
14491 %}
14492
14493 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14494 match(Set cr (CmpD src1 src2));
14495
14496 ins_cost(100);
14497 format %{ "vucomxsd $src1, $src2 test" %}
14498 ins_encode %{
14499 __ vucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14500 %}
14501 ins_pipe(pipe_slow);
14502 %}
14503
14504 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14505 match(Set cr (CmpD src1 (LoadD src2)));
14506
14507 ins_cost(100);
14508 format %{ "ucomisd $src1, $src2" %}
14509 ins_encode %{
14510 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14511 %}
14512 ins_pipe(pipe_slow);
14513 %}
14514
14515 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14516 match(Set cr (CmpD src1 (LoadD src2)));
14517
14518 ins_cost(100);
14519 format %{ "vucomxsd $src1, $src2" %}
14520 ins_encode %{
14521 __ vucomxsd($src1$$XMMRegister, $src2$$Address);
14522 %}
14523 ins_pipe(pipe_slow);
14524 %}
14525
14526 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14527 match(Set cr (CmpD src con));
14528 ins_cost(100);
14529 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14530 ins_encode %{
14531 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14532 %}
14533 ins_pipe(pipe_slow);
14534 %}
14535
14536 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14537 match(Set cr (CmpD src con));
14538
14539 ins_cost(100);
14540 format %{ "vucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14541 ins_encode %{
14542 __ vucomxsd($src$$XMMRegister, $constantaddress($con));
14543 %}
14544 ins_pipe(pipe_slow);
14545 %}
14546
14547 // Compare into -1,0,1
14548 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14549 %{
14550 match(Set dst (CmpF3 src1 src2));
14551 effect(KILL cr);
14552
14553 ins_cost(275);
14554 format %{ "ucomiss $src1, $src2\n\t"
14555 "movl $dst, #-1\n\t"
14556 "jp,s done\n\t"
14557 "jb,s done\n\t"
14558 "setne $dst\n\t"
14559 "movzbl $dst, $dst\n"
14560 "done:" %}
14561 ins_encode %{
14562 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14563 emit_cmpfp3(masm, $dst$$Register);
14564 %}
14565 ins_pipe(pipe_slow);
14566 %}
14567
14568 // Compare into -1,0,1
14569 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14570 %{
14571 match(Set dst (CmpF3 src1 (LoadF src2)));
14572 effect(KILL cr);
14573
14574 ins_cost(275);
14575 format %{ "ucomiss $src1, $src2\n\t"
14576 "movl $dst, #-1\n\t"
14577 "jp,s done\n\t"
14578 "jb,s done\n\t"
14579 "setne $dst\n\t"
14580 "movzbl $dst, $dst\n"
14581 "done:" %}
14582 ins_encode %{
14583 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14584 emit_cmpfp3(masm, $dst$$Register);
14585 %}
14586 ins_pipe(pipe_slow);
14587 %}
14588
14589 // Compare into -1,0,1
14590 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14591 match(Set dst (CmpF3 src con));
14592 effect(KILL cr);
14593
14594 ins_cost(275);
14595 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14596 "movl $dst, #-1\n\t"
14597 "jp,s done\n\t"
14598 "jb,s done\n\t"
14599 "setne $dst\n\t"
14600 "movzbl $dst, $dst\n"
14601 "done:" %}
14602 ins_encode %{
14603 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14604 emit_cmpfp3(masm, $dst$$Register);
14605 %}
14606 ins_pipe(pipe_slow);
14607 %}
14608
14609 // Compare into -1,0,1
14610 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14611 %{
14612 match(Set dst (CmpD3 src1 src2));
14613 effect(KILL cr);
14614
14615 ins_cost(275);
14616 format %{ "ucomisd $src1, $src2\n\t"
14617 "movl $dst, #-1\n\t"
14618 "jp,s done\n\t"
14619 "jb,s done\n\t"
14620 "setne $dst\n\t"
14621 "movzbl $dst, $dst\n"
14622 "done:" %}
14623 ins_encode %{
14624 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14625 emit_cmpfp3(masm, $dst$$Register);
14626 %}
14627 ins_pipe(pipe_slow);
14628 %}
14629
14630 // Compare into -1,0,1
14631 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14632 %{
14633 match(Set dst (CmpD3 src1 (LoadD src2)));
14634 effect(KILL cr);
14635
14636 ins_cost(275);
14637 format %{ "ucomisd $src1, $src2\n\t"
14638 "movl $dst, #-1\n\t"
14639 "jp,s done\n\t"
14640 "jb,s done\n\t"
14641 "setne $dst\n\t"
14642 "movzbl $dst, $dst\n"
14643 "done:" %}
14644 ins_encode %{
14645 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14646 emit_cmpfp3(masm, $dst$$Register);
14647 %}
14648 ins_pipe(pipe_slow);
14649 %}
14650
14651 // Compare into -1,0,1
14652 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14653 match(Set dst (CmpD3 src con));
14654 effect(KILL cr);
14655
14656 ins_cost(275);
14657 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14658 "movl $dst, #-1\n\t"
14659 "jp,s done\n\t"
14660 "jb,s done\n\t"
14661 "setne $dst\n\t"
14662 "movzbl $dst, $dst\n"
14663 "done:" %}
14664 ins_encode %{
14665 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14666 emit_cmpfp3(masm, $dst$$Register);
14667 %}
14668 ins_pipe(pipe_slow);
14669 %}
14670
14671 //----------Arithmetic Conversion Instructions---------------------------------
14672
14673 instruct convF2D_reg_reg(regD dst, regF src)
14674 %{
14675 match(Set dst (ConvF2D src));
14676
14677 format %{ "cvtss2sd $dst, $src" %}
14678 ins_encode %{
14679 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14680 %}
14681 ins_pipe(pipe_slow); // XXX
14682 %}
14683
14684 instruct convF2D_reg_mem(regD dst, memory src)
14685 %{
14686 predicate(UseAVX == 0);
14687 match(Set dst (ConvF2D (LoadF src)));
14688
14689 format %{ "cvtss2sd $dst, $src" %}
14690 ins_encode %{
14691 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14692 %}
14693 ins_pipe(pipe_slow); // XXX
14694 %}
14695
14696 instruct convD2F_reg_reg(regF dst, regD src)
14697 %{
14698 match(Set dst (ConvD2F src));
14699
14700 format %{ "cvtsd2ss $dst, $src" %}
14701 ins_encode %{
14702 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14703 %}
14704 ins_pipe(pipe_slow); // XXX
14705 %}
14706
14707 instruct convD2F_reg_mem(regF dst, memory src)
14708 %{
14709 predicate(UseAVX == 0);
14710 match(Set dst (ConvD2F (LoadD src)));
14711
14712 format %{ "cvtsd2ss $dst, $src" %}
14713 ins_encode %{
14714 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14715 %}
14716 ins_pipe(pipe_slow); // XXX
14717 %}
14718
14719 // XXX do mem variants
14720 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14721 %{
14722 predicate(!VM_Version::supports_avx10_2());
14723 match(Set dst (ConvF2I src));
14724 effect(KILL cr);
14725 format %{ "convert_f2i $dst, $src" %}
14726 ins_encode %{
14727 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14728 %}
14729 ins_pipe(pipe_slow);
14730 %}
14731
14732 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14733 %{
14734 predicate(VM_Version::supports_avx10_2());
14735 match(Set dst (ConvF2I src));
14736 format %{ "evcvttss2sisl $dst, $src" %}
14737 ins_encode %{
14738 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14739 %}
14740 ins_pipe(pipe_slow);
14741 %}
14742
14743 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14744 %{
14745 predicate(VM_Version::supports_avx10_2());
14746 match(Set dst (ConvF2I (LoadF src)));
14747 format %{ "evcvttss2sisl $dst, $src" %}
14748 ins_encode %{
14749 __ evcvttss2sisl($dst$$Register, $src$$Address);
14750 %}
14751 ins_pipe(pipe_slow);
14752 %}
14753
14754 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14755 %{
14756 predicate(!VM_Version::supports_avx10_2());
14757 match(Set dst (ConvF2L src));
14758 effect(KILL cr);
14759 format %{ "convert_f2l $dst, $src"%}
14760 ins_encode %{
14761 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14762 %}
14763 ins_pipe(pipe_slow);
14764 %}
14765
14766 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14767 %{
14768 predicate(VM_Version::supports_avx10_2());
14769 match(Set dst (ConvF2L src));
14770 format %{ "evcvttss2sisq $dst, $src" %}
14771 ins_encode %{
14772 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14773 %}
14774 ins_pipe(pipe_slow);
14775 %}
14776
14777 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14778 %{
14779 predicate(VM_Version::supports_avx10_2());
14780 match(Set dst (ConvF2L (LoadF src)));
14781 format %{ "evcvttss2sisq $dst, $src" %}
14782 ins_encode %{
14783 __ evcvttss2sisq($dst$$Register, $src$$Address);
14784 %}
14785 ins_pipe(pipe_slow);
14786 %}
14787
14788 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14789 %{
14790 predicate(!VM_Version::supports_avx10_2());
14791 match(Set dst (ConvD2I src));
14792 effect(KILL cr);
14793 format %{ "convert_d2i $dst, $src"%}
14794 ins_encode %{
14795 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14796 %}
14797 ins_pipe(pipe_slow);
14798 %}
14799
14800 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14801 %{
14802 predicate(VM_Version::supports_avx10_2());
14803 match(Set dst (ConvD2I src));
14804 format %{ "evcvttsd2sisl $dst, $src" %}
14805 ins_encode %{
14806 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14807 %}
14808 ins_pipe(pipe_slow);
14809 %}
14810
14811 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14812 %{
14813 predicate(VM_Version::supports_avx10_2());
14814 match(Set dst (ConvD2I (LoadD src)));
14815 format %{ "evcvttsd2sisl $dst, $src" %}
14816 ins_encode %{
14817 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14818 %}
14819 ins_pipe(pipe_slow);
14820 %}
14821
14822 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14823 %{
14824 predicate(!VM_Version::supports_avx10_2());
14825 match(Set dst (ConvD2L src));
14826 effect(KILL cr);
14827 format %{ "convert_d2l $dst, $src"%}
14828 ins_encode %{
14829 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14830 %}
14831 ins_pipe(pipe_slow);
14832 %}
14833
14834 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14835 %{
14836 predicate(VM_Version::supports_avx10_2());
14837 match(Set dst (ConvD2L src));
14838 format %{ "evcvttsd2sisq $dst, $src" %}
14839 ins_encode %{
14840 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14841 %}
14842 ins_pipe(pipe_slow);
14843 %}
14844
14845 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14846 %{
14847 predicate(VM_Version::supports_avx10_2());
14848 match(Set dst (ConvD2L (LoadD src)));
14849 format %{ "evcvttsd2sisq $dst, $src" %}
14850 ins_encode %{
14851 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14852 %}
14853 ins_pipe(pipe_slow);
14854 %}
14855
14856 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14857 %{
14858 match(Set dst (RoundD src));
14859 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14860 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14861 ins_encode %{
14862 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14863 %}
14864 ins_pipe(pipe_slow);
14865 %}
14866
14867 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14868 %{
14869 match(Set dst (RoundF src));
14870 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14871 format %{ "round_float $dst,$src" %}
14872 ins_encode %{
14873 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14874 %}
14875 ins_pipe(pipe_slow);
14876 %}
14877
14878 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14879 %{
14880 predicate(!UseXmmI2F);
14881 match(Set dst (ConvI2F src));
14882
14883 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14884 ins_encode %{
14885 if (UseAVX > 0) {
14886 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14887 }
14888 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14889 %}
14890 ins_pipe(pipe_slow); // XXX
14891 %}
14892
14893 instruct convI2F_reg_mem(regF dst, memory src)
14894 %{
14895 predicate(UseAVX == 0);
14896 match(Set dst (ConvI2F (LoadI src)));
14897
14898 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14899 ins_encode %{
14900 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14901 %}
14902 ins_pipe(pipe_slow); // XXX
14903 %}
14904
14905 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14906 %{
14907 predicate(!UseXmmI2D);
14908 match(Set dst (ConvI2D src));
14909
14910 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14911 ins_encode %{
14912 if (UseAVX > 0) {
14913 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14914 }
14915 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14916 %}
14917 ins_pipe(pipe_slow); // XXX
14918 %}
14919
14920 instruct convI2D_reg_mem(regD dst, memory src)
14921 %{
14922 predicate(UseAVX == 0);
14923 match(Set dst (ConvI2D (LoadI src)));
14924
14925 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14926 ins_encode %{
14927 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14928 %}
14929 ins_pipe(pipe_slow); // XXX
14930 %}
14931
14932 instruct convXI2F_reg(regF dst, rRegI src)
14933 %{
14934 predicate(UseXmmI2F);
14935 match(Set dst (ConvI2F src));
14936
14937 format %{ "movdl $dst, $src\n\t"
14938 "cvtdq2psl $dst, $dst\t# i2f" %}
14939 ins_encode %{
14940 __ movdl($dst$$XMMRegister, $src$$Register);
14941 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14942 %}
14943 ins_pipe(pipe_slow); // XXX
14944 %}
14945
14946 instruct convXI2D_reg(regD dst, rRegI src)
14947 %{
14948 predicate(UseXmmI2D);
14949 match(Set dst (ConvI2D src));
14950
14951 format %{ "movdl $dst, $src\n\t"
14952 "cvtdq2pdl $dst, $dst\t# i2d" %}
14953 ins_encode %{
14954 __ movdl($dst$$XMMRegister, $src$$Register);
14955 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14956 %}
14957 ins_pipe(pipe_slow); // XXX
14958 %}
14959
14960 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14961 %{
14962 match(Set dst (ConvL2F src));
14963
14964 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14965 ins_encode %{
14966 if (UseAVX > 0) {
14967 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14968 }
14969 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14970 %}
14971 ins_pipe(pipe_slow); // XXX
14972 %}
14973
14974 instruct convL2F_reg_mem(regF dst, memory src)
14975 %{
14976 predicate(UseAVX == 0);
14977 match(Set dst (ConvL2F (LoadL src)));
14978
14979 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14980 ins_encode %{
14981 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14982 %}
14983 ins_pipe(pipe_slow); // XXX
14984 %}
14985
14986 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14987 %{
14988 match(Set dst (ConvL2D src));
14989
14990 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14991 ins_encode %{
14992 if (UseAVX > 0) {
14993 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14994 }
14995 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14996 %}
14997 ins_pipe(pipe_slow); // XXX
14998 %}
14999
15000 instruct convL2D_reg_mem(regD dst, memory src)
15001 %{
15002 predicate(UseAVX == 0);
15003 match(Set dst (ConvL2D (LoadL src)));
15004
15005 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15006 ins_encode %{
15007 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15008 %}
15009 ins_pipe(pipe_slow); // XXX
15010 %}
15011
15012 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15013 %{
15014 match(Set dst (ConvI2L src));
15015
15016 ins_cost(125);
15017 format %{ "movslq $dst, $src\t# i2l" %}
15018 ins_encode %{
15019 __ movslq($dst$$Register, $src$$Register);
15020 %}
15021 ins_pipe(ialu_reg_reg);
15022 %}
15023
15024 // Zero-extend convert int to long
15025 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15026 %{
15027 match(Set dst (AndL (ConvI2L src) mask));
15028
15029 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15030 ins_encode %{
15031 if ($dst$$reg != $src$$reg) {
15032 __ movl($dst$$Register, $src$$Register);
15033 }
15034 %}
15035 ins_pipe(ialu_reg_reg);
15036 %}
15037
15038 // Zero-extend convert int to long
15039 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15040 %{
15041 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15042
15043 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15044 ins_encode %{
15045 __ movl($dst$$Register, $src$$Address);
15046 %}
15047 ins_pipe(ialu_reg_mem);
15048 %}
15049
15050 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15051 %{
15052 match(Set dst (AndL src mask));
15053
15054 format %{ "movl $dst, $src\t# zero-extend long" %}
15055 ins_encode %{
15056 __ movl($dst$$Register, $src$$Register);
15057 %}
15058 ins_pipe(ialu_reg_reg);
15059 %}
15060
15061 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15062 %{
15063 match(Set dst (ConvL2I src));
15064
15065 format %{ "movl $dst, $src\t# l2i" %}
15066 ins_encode %{
15067 __ movl($dst$$Register, $src$$Register);
15068 %}
15069 ins_pipe(ialu_reg_reg);
15070 %}
15071
15072
15073 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15074 match(Set dst (MoveF2I src));
15075 effect(DEF dst, USE src);
15076
15077 ins_cost(125);
15078 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15079 ins_encode %{
15080 __ movl($dst$$Register, Address(rsp, $src$$disp));
15081 %}
15082 ins_pipe(ialu_reg_mem);
15083 %}
15084
15085 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15086 match(Set dst (MoveI2F src));
15087 effect(DEF dst, USE src);
15088
15089 ins_cost(125);
15090 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15091 ins_encode %{
15092 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15093 %}
15094 ins_pipe(pipe_slow);
15095 %}
15096
15097 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15098 match(Set dst (MoveD2L src));
15099 effect(DEF dst, USE src);
15100
15101 ins_cost(125);
15102 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15103 ins_encode %{
15104 __ movq($dst$$Register, Address(rsp, $src$$disp));
15105 %}
15106 ins_pipe(ialu_reg_mem);
15107 %}
15108
15109 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15110 predicate(!UseXmmLoadAndClearUpper);
15111 match(Set dst (MoveL2D src));
15112 effect(DEF dst, USE src);
15113
15114 ins_cost(125);
15115 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15116 ins_encode %{
15117 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15118 %}
15119 ins_pipe(pipe_slow);
15120 %}
15121
15122 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15123 predicate(UseXmmLoadAndClearUpper);
15124 match(Set dst (MoveL2D src));
15125 effect(DEF dst, USE src);
15126
15127 ins_cost(125);
15128 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15129 ins_encode %{
15130 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15131 %}
15132 ins_pipe(pipe_slow);
15133 %}
15134
15135
15136 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15137 match(Set dst (MoveF2I src));
15138 effect(DEF dst, USE src);
15139
15140 ins_cost(95); // XXX
15141 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15142 ins_encode %{
15143 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15144 %}
15145 ins_pipe(pipe_slow);
15146 %}
15147
15148 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15149 match(Set dst (MoveI2F src));
15150 effect(DEF dst, USE src);
15151
15152 ins_cost(100);
15153 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15154 ins_encode %{
15155 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15156 %}
15157 ins_pipe( ialu_mem_reg );
15158 %}
15159
15160 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15161 match(Set dst (MoveD2L src));
15162 effect(DEF dst, USE src);
15163
15164 ins_cost(95); // XXX
15165 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15166 ins_encode %{
15167 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15168 %}
15169 ins_pipe(pipe_slow);
15170 %}
15171
15172 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15173 match(Set dst (MoveL2D src));
15174 effect(DEF dst, USE src);
15175
15176 ins_cost(100);
15177 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15178 ins_encode %{
15179 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15180 %}
15181 ins_pipe(ialu_mem_reg);
15182 %}
15183
15184 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15185 match(Set dst (MoveF2I src));
15186 effect(DEF dst, USE src);
15187 ins_cost(85);
15188 format %{ "movd $dst,$src\t# MoveF2I" %}
15189 ins_encode %{
15190 __ movdl($dst$$Register, $src$$XMMRegister);
15191 %}
15192 ins_pipe( pipe_slow );
15193 %}
15194
15195 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15196 match(Set dst (MoveD2L src));
15197 effect(DEF dst, USE src);
15198 ins_cost(85);
15199 format %{ "movd $dst,$src\t# MoveD2L" %}
15200 ins_encode %{
15201 __ movdq($dst$$Register, $src$$XMMRegister);
15202 %}
15203 ins_pipe( pipe_slow );
15204 %}
15205
15206 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15207 match(Set dst (MoveI2F src));
15208 effect(DEF dst, USE src);
15209 ins_cost(100);
15210 format %{ "movd $dst,$src\t# MoveI2F" %}
15211 ins_encode %{
15212 __ movdl($dst$$XMMRegister, $src$$Register);
15213 %}
15214 ins_pipe( pipe_slow );
15215 %}
15216
15217 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15218 match(Set dst (MoveL2D src));
15219 effect(DEF dst, USE src);
15220 ins_cost(100);
15221 format %{ "movd $dst,$src\t# MoveL2D" %}
15222 ins_encode %{
15223 __ movdq($dst$$XMMRegister, $src$$Register);
15224 %}
15225 ins_pipe( pipe_slow );
15226 %}
15227
15228 // Fast clearing of an array
15229 // Small non-constant lenght ClearArray for non-AVX512 targets.
15230 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15231 Universe dummy, rFlagsReg cr)
15232 %{
15233 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15234 match(Set dummy (ClearArray cnt base));
15235 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15236
15237 format %{ $$template
15238 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15239 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15240 $$emit$$"jg LARGE\n\t"
15241 $$emit$$"dec rcx\n\t"
15242 $$emit$$"js DONE\t# Zero length\n\t"
15243 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15244 $$emit$$"dec rcx\n\t"
15245 $$emit$$"jge LOOP\n\t"
15246 $$emit$$"jmp DONE\n\t"
15247 $$emit$$"# LARGE:\n\t"
15248 if (UseFastStosb) {
15249 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15250 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15251 } else if (UseXMMForObjInit) {
15252 $$emit$$"mov rdi,rax\n\t"
15253 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15254 $$emit$$"jmpq L_zero_64_bytes\n\t"
15255 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15256 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15257 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15258 $$emit$$"add 0x40,rax\n\t"
15259 $$emit$$"# L_zero_64_bytes:\n\t"
15260 $$emit$$"sub 0x8,rcx\n\t"
15261 $$emit$$"jge L_loop\n\t"
15262 $$emit$$"add 0x4,rcx\n\t"
15263 $$emit$$"jl L_tail\n\t"
15264 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15265 $$emit$$"add 0x20,rax\n\t"
15266 $$emit$$"sub 0x4,rcx\n\t"
15267 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15268 $$emit$$"add 0x4,rcx\n\t"
15269 $$emit$$"jle L_end\n\t"
15270 $$emit$$"dec rcx\n\t"
15271 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15272 $$emit$$"vmovq xmm0,(rax)\n\t"
15273 $$emit$$"add 0x8,rax\n\t"
15274 $$emit$$"dec rcx\n\t"
15275 $$emit$$"jge L_sloop\n\t"
15276 $$emit$$"# L_end:\n\t"
15277 } else {
15278 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15279 }
15280 $$emit$$"# DONE"
15281 %}
15282 ins_encode %{
15283 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15284 $tmp$$XMMRegister, false, knoreg);
15285 %}
15286 ins_pipe(pipe_slow);
15287 %}
15288
15289 // Small non-constant length ClearArray for AVX512 targets.
15290 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15291 Universe dummy, rFlagsReg cr)
15292 %{
15293 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15294 match(Set dummy (ClearArray cnt base));
15295 ins_cost(125);
15296 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15297
15298 format %{ $$template
15299 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15300 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15301 $$emit$$"jg LARGE\n\t"
15302 $$emit$$"dec rcx\n\t"
15303 $$emit$$"js DONE\t# Zero length\n\t"
15304 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15305 $$emit$$"dec rcx\n\t"
15306 $$emit$$"jge LOOP\n\t"
15307 $$emit$$"jmp DONE\n\t"
15308 $$emit$$"# LARGE:\n\t"
15309 if (UseFastStosb) {
15310 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15311 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15312 } else if (UseXMMForObjInit) {
15313 $$emit$$"mov rdi,rax\n\t"
15314 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15315 $$emit$$"jmpq L_zero_64_bytes\n\t"
15316 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15317 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15318 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15319 $$emit$$"add 0x40,rax\n\t"
15320 $$emit$$"# L_zero_64_bytes:\n\t"
15321 $$emit$$"sub 0x8,rcx\n\t"
15322 $$emit$$"jge L_loop\n\t"
15323 $$emit$$"add 0x4,rcx\n\t"
15324 $$emit$$"jl L_tail\n\t"
15325 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15326 $$emit$$"add 0x20,rax\n\t"
15327 $$emit$$"sub 0x4,rcx\n\t"
15328 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15329 $$emit$$"add 0x4,rcx\n\t"
15330 $$emit$$"jle L_end\n\t"
15331 $$emit$$"dec rcx\n\t"
15332 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15333 $$emit$$"vmovq xmm0,(rax)\n\t"
15334 $$emit$$"add 0x8,rax\n\t"
15335 $$emit$$"dec rcx\n\t"
15336 $$emit$$"jge L_sloop\n\t"
15337 $$emit$$"# L_end:\n\t"
15338 } else {
15339 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15340 }
15341 $$emit$$"# DONE"
15342 %}
15343 ins_encode %{
15344 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15345 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15346 %}
15347 ins_pipe(pipe_slow);
15348 %}
15349
15350 // Large non-constant length ClearArray for non-AVX512 targets.
15351 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15352 Universe dummy, rFlagsReg cr)
15353 %{
15354 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15355 match(Set dummy (ClearArray cnt base));
15356 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15357
15358 format %{ $$template
15359 if (UseFastStosb) {
15360 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15361 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15362 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15363 } else if (UseXMMForObjInit) {
15364 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15365 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15366 $$emit$$"jmpq L_zero_64_bytes\n\t"
15367 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15368 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15369 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15370 $$emit$$"add 0x40,rax\n\t"
15371 $$emit$$"# L_zero_64_bytes:\n\t"
15372 $$emit$$"sub 0x8,rcx\n\t"
15373 $$emit$$"jge L_loop\n\t"
15374 $$emit$$"add 0x4,rcx\n\t"
15375 $$emit$$"jl L_tail\n\t"
15376 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15377 $$emit$$"add 0x20,rax\n\t"
15378 $$emit$$"sub 0x4,rcx\n\t"
15379 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15380 $$emit$$"add 0x4,rcx\n\t"
15381 $$emit$$"jle L_end\n\t"
15382 $$emit$$"dec rcx\n\t"
15383 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15384 $$emit$$"vmovq xmm0,(rax)\n\t"
15385 $$emit$$"add 0x8,rax\n\t"
15386 $$emit$$"dec rcx\n\t"
15387 $$emit$$"jge L_sloop\n\t"
15388 $$emit$$"# L_end:\n\t"
15389 } else {
15390 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15391 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15392 }
15393 %}
15394 ins_encode %{
15395 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15396 $tmp$$XMMRegister, true, knoreg);
15397 %}
15398 ins_pipe(pipe_slow);
15399 %}
15400
15401 // Large non-constant length ClearArray for AVX512 targets.
15402 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15403 Universe dummy, rFlagsReg cr)
15404 %{
15405 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15406 match(Set dummy (ClearArray cnt base));
15407 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15408
15409 format %{ $$template
15410 if (UseFastStosb) {
15411 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15412 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15413 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15414 } else if (UseXMMForObjInit) {
15415 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15416 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15417 $$emit$$"jmpq L_zero_64_bytes\n\t"
15418 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15419 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15420 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15421 $$emit$$"add 0x40,rax\n\t"
15422 $$emit$$"# L_zero_64_bytes:\n\t"
15423 $$emit$$"sub 0x8,rcx\n\t"
15424 $$emit$$"jge L_loop\n\t"
15425 $$emit$$"add 0x4,rcx\n\t"
15426 $$emit$$"jl L_tail\n\t"
15427 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15428 $$emit$$"add 0x20,rax\n\t"
15429 $$emit$$"sub 0x4,rcx\n\t"
15430 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15431 $$emit$$"add 0x4,rcx\n\t"
15432 $$emit$$"jle L_end\n\t"
15433 $$emit$$"dec rcx\n\t"
15434 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15435 $$emit$$"vmovq xmm0,(rax)\n\t"
15436 $$emit$$"add 0x8,rax\n\t"
15437 $$emit$$"dec rcx\n\t"
15438 $$emit$$"jge L_sloop\n\t"
15439 $$emit$$"# L_end:\n\t"
15440 } else {
15441 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15442 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15443 }
15444 %}
15445 ins_encode %{
15446 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15447 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15448 %}
15449 ins_pipe(pipe_slow);
15450 %}
15451
15452 // Small constant length ClearArray for AVX512 targets.
15453 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15454 %{
15455 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15456 match(Set dummy (ClearArray cnt base));
15457 ins_cost(100);
15458 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15459 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15460 ins_encode %{
15461 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15462 %}
15463 ins_pipe(pipe_slow);
15464 %}
15465
15466 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15467 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15468 %{
15469 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15470 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15471 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15472
15473 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15474 ins_encode %{
15475 __ string_compare($str1$$Register, $str2$$Register,
15476 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15477 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15478 %}
15479 ins_pipe( pipe_slow );
15480 %}
15481
15482 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15483 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15484 %{
15485 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15486 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15487 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15488
15489 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15490 ins_encode %{
15491 __ string_compare($str1$$Register, $str2$$Register,
15492 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15493 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15494 %}
15495 ins_pipe( pipe_slow );
15496 %}
15497
15498 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15499 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15500 %{
15501 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15502 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15503 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15504
15505 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15506 ins_encode %{
15507 __ string_compare($str1$$Register, $str2$$Register,
15508 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15509 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15510 %}
15511 ins_pipe( pipe_slow );
15512 %}
15513
15514 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15515 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15516 %{
15517 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15518 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15519 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15520
15521 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15522 ins_encode %{
15523 __ string_compare($str1$$Register, $str2$$Register,
15524 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15525 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15526 %}
15527 ins_pipe( pipe_slow );
15528 %}
15529
15530 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15531 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15532 %{
15533 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15534 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15535 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15536
15537 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15538 ins_encode %{
15539 __ string_compare($str1$$Register, $str2$$Register,
15540 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15541 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15542 %}
15543 ins_pipe( pipe_slow );
15544 %}
15545
15546 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15547 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15548 %{
15549 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15550 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15551 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15552
15553 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15554 ins_encode %{
15555 __ string_compare($str1$$Register, $str2$$Register,
15556 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15557 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15558 %}
15559 ins_pipe( pipe_slow );
15560 %}
15561
15562 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15563 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15564 %{
15565 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15566 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15567 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15568
15569 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15570 ins_encode %{
15571 __ string_compare($str2$$Register, $str1$$Register,
15572 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15573 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15574 %}
15575 ins_pipe( pipe_slow );
15576 %}
15577
15578 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15579 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15580 %{
15581 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15582 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15583 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15584
15585 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15586 ins_encode %{
15587 __ string_compare($str2$$Register, $str1$$Register,
15588 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15589 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15590 %}
15591 ins_pipe( pipe_slow );
15592 %}
15593
15594 // fast search of substring with known size.
15595 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15596 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15597 %{
15598 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15599 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15600 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15601
15602 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15603 ins_encode %{
15604 int icnt2 = (int)$int_cnt2$$constant;
15605 if (icnt2 >= 16) {
15606 // IndexOf for constant substrings with size >= 16 elements
15607 // which don't need to be loaded through stack.
15608 __ string_indexofC8($str1$$Register, $str2$$Register,
15609 $cnt1$$Register, $cnt2$$Register,
15610 icnt2, $result$$Register,
15611 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15612 } else {
15613 // Small strings are loaded through stack if they cross page boundary.
15614 __ string_indexof($str1$$Register, $str2$$Register,
15615 $cnt1$$Register, $cnt2$$Register,
15616 icnt2, $result$$Register,
15617 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15618 }
15619 %}
15620 ins_pipe( pipe_slow );
15621 %}
15622
15623 // fast search of substring with known size.
15624 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15625 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15626 %{
15627 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15628 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15629 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15630
15631 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15632 ins_encode %{
15633 int icnt2 = (int)$int_cnt2$$constant;
15634 if (icnt2 >= 8) {
15635 // IndexOf for constant substrings with size >= 8 elements
15636 // which don't need to be loaded through stack.
15637 __ string_indexofC8($str1$$Register, $str2$$Register,
15638 $cnt1$$Register, $cnt2$$Register,
15639 icnt2, $result$$Register,
15640 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15641 } else {
15642 // Small strings are loaded through stack if they cross page boundary.
15643 __ string_indexof($str1$$Register, $str2$$Register,
15644 $cnt1$$Register, $cnt2$$Register,
15645 icnt2, $result$$Register,
15646 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15647 }
15648 %}
15649 ins_pipe( pipe_slow );
15650 %}
15651
15652 // fast search of substring with known size.
15653 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15654 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15655 %{
15656 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15657 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15658 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15659
15660 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15661 ins_encode %{
15662 int icnt2 = (int)$int_cnt2$$constant;
15663 if (icnt2 >= 8) {
15664 // IndexOf for constant substrings with size >= 8 elements
15665 // which don't need to be loaded through stack.
15666 __ string_indexofC8($str1$$Register, $str2$$Register,
15667 $cnt1$$Register, $cnt2$$Register,
15668 icnt2, $result$$Register,
15669 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15670 } else {
15671 // Small strings are loaded through stack if they cross page boundary.
15672 __ string_indexof($str1$$Register, $str2$$Register,
15673 $cnt1$$Register, $cnt2$$Register,
15674 icnt2, $result$$Register,
15675 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15676 }
15677 %}
15678 ins_pipe( pipe_slow );
15679 %}
15680
15681 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15682 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15683 %{
15684 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15685 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15686 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15687
15688 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15689 ins_encode %{
15690 __ string_indexof($str1$$Register, $str2$$Register,
15691 $cnt1$$Register, $cnt2$$Register,
15692 (-1), $result$$Register,
15693 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15694 %}
15695 ins_pipe( pipe_slow );
15696 %}
15697
15698 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15699 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15700 %{
15701 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15702 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15703 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15704
15705 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15706 ins_encode %{
15707 __ string_indexof($str1$$Register, $str2$$Register,
15708 $cnt1$$Register, $cnt2$$Register,
15709 (-1), $result$$Register,
15710 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15711 %}
15712 ins_pipe( pipe_slow );
15713 %}
15714
15715 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15716 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15717 %{
15718 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15719 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15720 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15721
15722 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15723 ins_encode %{
15724 __ string_indexof($str1$$Register, $str2$$Register,
15725 $cnt1$$Register, $cnt2$$Register,
15726 (-1), $result$$Register,
15727 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15728 %}
15729 ins_pipe( pipe_slow );
15730 %}
15731
15732 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15733 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15734 %{
15735 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15736 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15737 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15738 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15739 ins_encode %{
15740 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15741 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15742 %}
15743 ins_pipe( pipe_slow );
15744 %}
15745
15746 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15747 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15748 %{
15749 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15750 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15751 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15752 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15753 ins_encode %{
15754 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15755 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15756 %}
15757 ins_pipe( pipe_slow );
15758 %}
15759
15760 // fast string equals
15761 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15762 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15763 %{
15764 predicate(!VM_Version::supports_avx512vlbw());
15765 match(Set result (StrEquals (Binary str1 str2) cnt));
15766 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15767
15768 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15769 ins_encode %{
15770 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15771 $cnt$$Register, $result$$Register, $tmp3$$Register,
15772 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15773 %}
15774 ins_pipe( pipe_slow );
15775 %}
15776
15777 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15778 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15779 %{
15780 predicate(VM_Version::supports_avx512vlbw());
15781 match(Set result (StrEquals (Binary str1 str2) cnt));
15782 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15783
15784 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15785 ins_encode %{
15786 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15787 $cnt$$Register, $result$$Register, $tmp3$$Register,
15788 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15789 %}
15790 ins_pipe( pipe_slow );
15791 %}
15792
15793 // fast array equals
15794 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15795 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15796 %{
15797 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15798 match(Set result (AryEq ary1 ary2));
15799 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15800
15801 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15802 ins_encode %{
15803 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15804 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15805 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15806 %}
15807 ins_pipe( pipe_slow );
15808 %}
15809
15810 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15811 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15812 %{
15813 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15814 match(Set result (AryEq ary1 ary2));
15815 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15816
15817 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15818 ins_encode %{
15819 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15820 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15821 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15822 %}
15823 ins_pipe( pipe_slow );
15824 %}
15825
15826 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15827 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15828 %{
15829 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15830 match(Set result (AryEq ary1 ary2));
15831 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15832
15833 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15834 ins_encode %{
15835 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15836 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15837 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15838 %}
15839 ins_pipe( pipe_slow );
15840 %}
15841
15842 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15843 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15844 %{
15845 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15846 match(Set result (AryEq ary1 ary2));
15847 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15848
15849 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15850 ins_encode %{
15851 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15852 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15853 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15854 %}
15855 ins_pipe( pipe_slow );
15856 %}
15857
15858 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15859 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15860 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15861 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15862 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15863 %{
15864 predicate(UseAVX >= 2);
15865 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15866 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15867 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15868 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15869 USE basic_type, KILL cr);
15870
15871 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15872 ins_encode %{
15873 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15874 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15875 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15876 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15877 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15878 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15879 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15880 %}
15881 ins_pipe( pipe_slow );
15882 %}
15883
15884 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15885 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15886 %{
15887 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15888 match(Set result (CountPositives ary1 len));
15889 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15890
15891 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15892 ins_encode %{
15893 __ count_positives($ary1$$Register, $len$$Register,
15894 $result$$Register, $tmp3$$Register,
15895 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15896 %}
15897 ins_pipe( pipe_slow );
15898 %}
15899
15900 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15901 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15902 %{
15903 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15904 match(Set result (CountPositives ary1 len));
15905 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15906
15907 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15908 ins_encode %{
15909 __ count_positives($ary1$$Register, $len$$Register,
15910 $result$$Register, $tmp3$$Register,
15911 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15912 %}
15913 ins_pipe( pipe_slow );
15914 %}
15915
15916 // fast char[] to byte[] compression
15917 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15918 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15919 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15920 match(Set result (StrCompressedCopy src (Binary dst len)));
15921 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15922 USE_KILL len, KILL tmp5, KILL cr);
15923
15924 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15925 ins_encode %{
15926 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15927 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15928 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15929 knoreg, knoreg);
15930 %}
15931 ins_pipe( pipe_slow );
15932 %}
15933
15934 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15935 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15936 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15937 match(Set result (StrCompressedCopy src (Binary dst len)));
15938 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15939 USE_KILL len, KILL tmp5, KILL cr);
15940
15941 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15942 ins_encode %{
15943 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15944 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15945 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15946 $ktmp1$$KRegister, $ktmp2$$KRegister);
15947 %}
15948 ins_pipe( pipe_slow );
15949 %}
15950 // fast byte[] to char[] inflation
15951 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15952 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15953 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15954 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15955 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15956
15957 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15958 ins_encode %{
15959 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15960 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15961 %}
15962 ins_pipe( pipe_slow );
15963 %}
15964
15965 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15966 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15967 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15968 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15969 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15970
15971 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15972 ins_encode %{
15973 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15974 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15975 %}
15976 ins_pipe( pipe_slow );
15977 %}
15978
15979 // encode char[] to byte[] in ISO_8859_1
15980 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15981 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15982 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15983 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15984 match(Set result (EncodeISOArray src (Binary dst len)));
15985 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15986
15987 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15988 ins_encode %{
15989 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15990 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15991 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15992 %}
15993 ins_pipe( pipe_slow );
15994 %}
15995
15996 // encode char[] to byte[] in ASCII
15997 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15998 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15999 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16000 predicate(((EncodeISOArrayNode*)n)->is_ascii());
16001 match(Set result (EncodeISOArray src (Binary dst len)));
16002 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16003
16004 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16005 ins_encode %{
16006 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16007 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16008 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16009 %}
16010 ins_pipe( pipe_slow );
16011 %}
16012
16013 //----------Overflow Math Instructions-----------------------------------------
16014
16015 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16016 %{
16017 match(Set cr (OverflowAddI op1 op2));
16018 effect(DEF cr, USE_KILL op1, USE op2);
16019
16020 format %{ "addl $op1, $op2\t# overflow check int" %}
16021
16022 ins_encode %{
16023 __ addl($op1$$Register, $op2$$Register);
16024 %}
16025 ins_pipe(ialu_reg_reg);
16026 %}
16027
16028 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16029 %{
16030 match(Set cr (OverflowAddI op1 op2));
16031 effect(DEF cr, USE_KILL op1, USE op2);
16032
16033 format %{ "addl $op1, $op2\t# overflow check int" %}
16034
16035 ins_encode %{
16036 __ addl($op1$$Register, $op2$$constant);
16037 %}
16038 ins_pipe(ialu_reg_reg);
16039 %}
16040
16041 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16042 %{
16043 match(Set cr (OverflowAddL op1 op2));
16044 effect(DEF cr, USE_KILL op1, USE op2);
16045
16046 format %{ "addq $op1, $op2\t# overflow check long" %}
16047 ins_encode %{
16048 __ addq($op1$$Register, $op2$$Register);
16049 %}
16050 ins_pipe(ialu_reg_reg);
16051 %}
16052
16053 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16054 %{
16055 match(Set cr (OverflowAddL op1 op2));
16056 effect(DEF cr, USE_KILL op1, USE op2);
16057
16058 format %{ "addq $op1, $op2\t# overflow check long" %}
16059 ins_encode %{
16060 __ addq($op1$$Register, $op2$$constant);
16061 %}
16062 ins_pipe(ialu_reg_reg);
16063 %}
16064
16065 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16066 %{
16067 match(Set cr (OverflowSubI op1 op2));
16068
16069 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16070 ins_encode %{
16071 __ cmpl($op1$$Register, $op2$$Register);
16072 %}
16073 ins_pipe(ialu_reg_reg);
16074 %}
16075
16076 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16077 %{
16078 match(Set cr (OverflowSubI op1 op2));
16079
16080 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16081 ins_encode %{
16082 __ cmpl($op1$$Register, $op2$$constant);
16083 %}
16084 ins_pipe(ialu_reg_reg);
16085 %}
16086
16087 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16088 %{
16089 match(Set cr (OverflowSubL op1 op2));
16090
16091 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16092 ins_encode %{
16093 __ cmpq($op1$$Register, $op2$$Register);
16094 %}
16095 ins_pipe(ialu_reg_reg);
16096 %}
16097
16098 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16099 %{
16100 match(Set cr (OverflowSubL op1 op2));
16101
16102 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16103 ins_encode %{
16104 __ cmpq($op1$$Register, $op2$$constant);
16105 %}
16106 ins_pipe(ialu_reg_reg);
16107 %}
16108
16109 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16110 %{
16111 match(Set cr (OverflowSubI zero op2));
16112 effect(DEF cr, USE_KILL op2);
16113
16114 format %{ "negl $op2\t# overflow check int" %}
16115 ins_encode %{
16116 __ negl($op2$$Register);
16117 %}
16118 ins_pipe(ialu_reg_reg);
16119 %}
16120
16121 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16122 %{
16123 match(Set cr (OverflowSubL zero op2));
16124 effect(DEF cr, USE_KILL op2);
16125
16126 format %{ "negq $op2\t# overflow check long" %}
16127 ins_encode %{
16128 __ negq($op2$$Register);
16129 %}
16130 ins_pipe(ialu_reg_reg);
16131 %}
16132
16133 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16134 %{
16135 match(Set cr (OverflowMulI op1 op2));
16136 effect(DEF cr, USE_KILL op1, USE op2);
16137
16138 format %{ "imull $op1, $op2\t# overflow check int" %}
16139 ins_encode %{
16140 __ imull($op1$$Register, $op2$$Register);
16141 %}
16142 ins_pipe(ialu_reg_reg_alu0);
16143 %}
16144
16145 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16146 %{
16147 match(Set cr (OverflowMulI op1 op2));
16148 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16149
16150 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16151 ins_encode %{
16152 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16153 %}
16154 ins_pipe(ialu_reg_reg_alu0);
16155 %}
16156
16157 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16158 %{
16159 match(Set cr (OverflowMulL op1 op2));
16160 effect(DEF cr, USE_KILL op1, USE op2);
16161
16162 format %{ "imulq $op1, $op2\t# overflow check long" %}
16163 ins_encode %{
16164 __ imulq($op1$$Register, $op2$$Register);
16165 %}
16166 ins_pipe(ialu_reg_reg_alu0);
16167 %}
16168
16169 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16170 %{
16171 match(Set cr (OverflowMulL op1 op2));
16172 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16173
16174 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16175 ins_encode %{
16176 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16177 %}
16178 ins_pipe(ialu_reg_reg_alu0);
16179 %}
16180
16181
16182 //----------Control Flow Instructions------------------------------------------
16183 // Signed compare Instructions
16184
16185 // XXX more variants!!
16186 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16187 %{
16188 match(Set cr (CmpI op1 op2));
16189 effect(DEF cr, USE op1, USE op2);
16190
16191 format %{ "cmpl $op1, $op2" %}
16192 ins_encode %{
16193 __ cmpl($op1$$Register, $op2$$Register);
16194 %}
16195 ins_pipe(ialu_cr_reg_reg);
16196 %}
16197
16198 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16199 %{
16200 match(Set cr (CmpI op1 op2));
16201
16202 format %{ "cmpl $op1, $op2" %}
16203 ins_encode %{
16204 __ cmpl($op1$$Register, $op2$$constant);
16205 %}
16206 ins_pipe(ialu_cr_reg_imm);
16207 %}
16208
16209 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16210 %{
16211 match(Set cr (CmpI op1 (LoadI op2)));
16212
16213 ins_cost(500); // XXX
16214 format %{ "cmpl $op1, $op2" %}
16215 ins_encode %{
16216 __ cmpl($op1$$Register, $op2$$Address);
16217 %}
16218 ins_pipe(ialu_cr_reg_mem);
16219 %}
16220
16221 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16222 %{
16223 match(Set cr (CmpI src zero));
16224
16225 format %{ "testl $src, $src" %}
16226 ins_encode %{
16227 __ testl($src$$Register, $src$$Register);
16228 %}
16229 ins_pipe(ialu_cr_reg_imm);
16230 %}
16231
16232 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16233 %{
16234 match(Set cr (CmpI (AndI src con) zero));
16235
16236 format %{ "testl $src, $con" %}
16237 ins_encode %{
16238 __ testl($src$$Register, $con$$constant);
16239 %}
16240 ins_pipe(ialu_cr_reg_imm);
16241 %}
16242
16243 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16244 %{
16245 match(Set cr (CmpI (AndI src1 src2) zero));
16246
16247 format %{ "testl $src1, $src2" %}
16248 ins_encode %{
16249 __ testl($src1$$Register, $src2$$Register);
16250 %}
16251 ins_pipe(ialu_cr_reg_imm);
16252 %}
16253
16254 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16255 %{
16256 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16257
16258 format %{ "testl $src, $mem" %}
16259 ins_encode %{
16260 __ testl($src$$Register, $mem$$Address);
16261 %}
16262 ins_pipe(ialu_cr_reg_mem);
16263 %}
16264
16265 // Unsigned compare Instructions; really, same as signed except they
16266 // produce an rFlagsRegU instead of rFlagsReg.
16267 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16268 %{
16269 match(Set cr (CmpU op1 op2));
16270
16271 format %{ "cmpl $op1, $op2\t# unsigned" %}
16272 ins_encode %{
16273 __ cmpl($op1$$Register, $op2$$Register);
16274 %}
16275 ins_pipe(ialu_cr_reg_reg);
16276 %}
16277
16278 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16279 %{
16280 match(Set cr (CmpU op1 op2));
16281
16282 format %{ "cmpl $op1, $op2\t# unsigned" %}
16283 ins_encode %{
16284 __ cmpl($op1$$Register, $op2$$constant);
16285 %}
16286 ins_pipe(ialu_cr_reg_imm);
16287 %}
16288
16289 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16290 %{
16291 match(Set cr (CmpU op1 (LoadI op2)));
16292
16293 ins_cost(500); // XXX
16294 format %{ "cmpl $op1, $op2\t# unsigned" %}
16295 ins_encode %{
16296 __ cmpl($op1$$Register, $op2$$Address);
16297 %}
16298 ins_pipe(ialu_cr_reg_mem);
16299 %}
16300
16301 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16302 %{
16303 match(Set cr (CmpU src zero));
16304
16305 format %{ "testl $src, $src\t# unsigned" %}
16306 ins_encode %{
16307 __ testl($src$$Register, $src$$Register);
16308 %}
16309 ins_pipe(ialu_cr_reg_imm);
16310 %}
16311
16312 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16313 %{
16314 match(Set cr (CmpP op1 op2));
16315
16316 format %{ "cmpq $op1, $op2\t# ptr" %}
16317 ins_encode %{
16318 __ cmpq($op1$$Register, $op2$$Register);
16319 %}
16320 ins_pipe(ialu_cr_reg_reg);
16321 %}
16322
16323 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16324 %{
16325 match(Set cr (CmpP op1 (LoadP op2)));
16326 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16327
16328 ins_cost(500); // XXX
16329 format %{ "cmpq $op1, $op2\t# ptr" %}
16330 ins_encode %{
16331 __ cmpq($op1$$Register, $op2$$Address);
16332 %}
16333 ins_pipe(ialu_cr_reg_mem);
16334 %}
16335
16336 // XXX this is generalized by compP_rReg_mem???
16337 // Compare raw pointer (used in out-of-heap check).
16338 // Only works because non-oop pointers must be raw pointers
16339 // and raw pointers have no anti-dependencies.
16340 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16341 %{
16342 predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16343 n->in(2)->as_Load()->barrier_data() == 0);
16344 match(Set cr (CmpP op1 (LoadP op2)));
16345
16346 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16347 ins_encode %{
16348 __ cmpq($op1$$Register, $op2$$Address);
16349 %}
16350 ins_pipe(ialu_cr_reg_mem);
16351 %}
16352
16353 // This will generate a signed flags result. This should be OK since
16354 // any compare to a zero should be eq/neq.
16355 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16356 %{
16357 match(Set cr (CmpP src zero));
16358
16359 format %{ "testq $src, $src\t# ptr" %}
16360 ins_encode %{
16361 __ testq($src$$Register, $src$$Register);
16362 %}
16363 ins_pipe(ialu_cr_reg_imm);
16364 %}
16365
16366 // This will generate a signed flags result. This should be OK since
16367 // any compare to a zero should be eq/neq.
16368 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16369 %{
16370 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16371 n->in(1)->as_Load()->barrier_data() == 0);
16372 match(Set cr (CmpP (LoadP op) zero));
16373
16374 ins_cost(500); // XXX
16375 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16376 ins_encode %{
16377 __ testq($op$$Address, 0xFFFFFFFF);
16378 %}
16379 ins_pipe(ialu_cr_reg_imm);
16380 %}
16381
16382 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16383 %{
16384 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16385 n->in(1)->as_Load()->barrier_data() == 0);
16386 match(Set cr (CmpP (LoadP mem) zero));
16387
16388 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16389 ins_encode %{
16390 __ cmpq(r12, $mem$$Address);
16391 %}
16392 ins_pipe(ialu_cr_reg_mem);
16393 %}
16394
16395 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16396 %{
16397 match(Set cr (CmpN op1 op2));
16398
16399 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16400 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16401 ins_pipe(ialu_cr_reg_reg);
16402 %}
16403
16404 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16405 %{
16406 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16407 match(Set cr (CmpN src (LoadN mem)));
16408
16409 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16410 ins_encode %{
16411 __ cmpl($src$$Register, $mem$$Address);
16412 %}
16413 ins_pipe(ialu_cr_reg_mem);
16414 %}
16415
16416 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16417 match(Set cr (CmpN op1 op2));
16418
16419 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16420 ins_encode %{
16421 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16422 %}
16423 ins_pipe(ialu_cr_reg_imm);
16424 %}
16425
16426 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16427 %{
16428 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16429 match(Set cr (CmpN src (LoadN mem)));
16430
16431 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16432 ins_encode %{
16433 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16434 %}
16435 ins_pipe(ialu_cr_reg_mem);
16436 %}
16437
16438 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16439 match(Set cr (CmpN op1 op2));
16440
16441 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16442 ins_encode %{
16443 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16444 %}
16445 ins_pipe(ialu_cr_reg_imm);
16446 %}
16447
16448 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16449 %{
16450 predicate(!UseCompactObjectHeaders);
16451 match(Set cr (CmpN src (LoadNKlass mem)));
16452
16453 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16454 ins_encode %{
16455 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16456 %}
16457 ins_pipe(ialu_cr_reg_mem);
16458 %}
16459
16460 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16461 match(Set cr (CmpN src zero));
16462
16463 format %{ "testl $src, $src\t# compressed ptr" %}
16464 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16465 ins_pipe(ialu_cr_reg_imm);
16466 %}
16467
16468 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16469 %{
16470 predicate(CompressedOops::base() != nullptr &&
16471 n->in(1)->as_Load()->barrier_data() == 0);
16472 match(Set cr (CmpN (LoadN mem) zero));
16473
16474 ins_cost(500); // XXX
16475 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16476 ins_encode %{
16477 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16478 %}
16479 ins_pipe(ialu_cr_reg_mem);
16480 %}
16481
16482 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16483 %{
16484 predicate(CompressedOops::base() == nullptr &&
16485 n->in(1)->as_Load()->barrier_data() == 0);
16486 match(Set cr (CmpN (LoadN mem) zero));
16487
16488 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16489 ins_encode %{
16490 __ cmpl(r12, $mem$$Address);
16491 %}
16492 ins_pipe(ialu_cr_reg_mem);
16493 %}
16494
16495 // Yanked all unsigned pointer compare operations.
16496 // Pointer compares are done with CmpP which is already unsigned.
16497
16498 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16499 %{
16500 match(Set cr (CmpL op1 op2));
16501
16502 format %{ "cmpq $op1, $op2" %}
16503 ins_encode %{
16504 __ cmpq($op1$$Register, $op2$$Register);
16505 %}
16506 ins_pipe(ialu_cr_reg_reg);
16507 %}
16508
16509 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16510 %{
16511 match(Set cr (CmpL op1 op2));
16512
16513 format %{ "cmpq $op1, $op2" %}
16514 ins_encode %{
16515 __ cmpq($op1$$Register, $op2$$constant);
16516 %}
16517 ins_pipe(ialu_cr_reg_imm);
16518 %}
16519
16520 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16521 %{
16522 match(Set cr (CmpL op1 (LoadL op2)));
16523
16524 format %{ "cmpq $op1, $op2" %}
16525 ins_encode %{
16526 __ cmpq($op1$$Register, $op2$$Address);
16527 %}
16528 ins_pipe(ialu_cr_reg_mem);
16529 %}
16530
16531 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16532 %{
16533 match(Set cr (CmpL src zero));
16534
16535 format %{ "testq $src, $src" %}
16536 ins_encode %{
16537 __ testq($src$$Register, $src$$Register);
16538 %}
16539 ins_pipe(ialu_cr_reg_imm);
16540 %}
16541
16542 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16543 %{
16544 match(Set cr (CmpL (AndL src con) zero));
16545
16546 format %{ "testq $src, $con\t# long" %}
16547 ins_encode %{
16548 __ testq($src$$Register, $con$$constant);
16549 %}
16550 ins_pipe(ialu_cr_reg_imm);
16551 %}
16552
16553 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16554 %{
16555 match(Set cr (CmpL (AndL src1 src2) zero));
16556
16557 format %{ "testq $src1, $src2\t# long" %}
16558 ins_encode %{
16559 __ testq($src1$$Register, $src2$$Register);
16560 %}
16561 ins_pipe(ialu_cr_reg_imm);
16562 %}
16563
16564 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16565 %{
16566 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16567
16568 format %{ "testq $src, $mem" %}
16569 ins_encode %{
16570 __ testq($src$$Register, $mem$$Address);
16571 %}
16572 ins_pipe(ialu_cr_reg_mem);
16573 %}
16574
16575 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16576 %{
16577 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16578
16579 format %{ "testq $src, $mem" %}
16580 ins_encode %{
16581 __ testq($src$$Register, $mem$$Address);
16582 %}
16583 ins_pipe(ialu_cr_reg_mem);
16584 %}
16585
16586 // Manifest a CmpU result in an integer register. Very painful.
16587 // This is the test to avoid.
16588 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16589 %{
16590 match(Set dst (CmpU3 src1 src2));
16591 effect(KILL flags);
16592
16593 ins_cost(275); // XXX
16594 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16595 "movl $dst, -1\n\t"
16596 "jb,u done\n\t"
16597 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16598 "done:" %}
16599 ins_encode %{
16600 Label done;
16601 __ cmpl($src1$$Register, $src2$$Register);
16602 __ movl($dst$$Register, -1);
16603 __ jccb(Assembler::below, done);
16604 __ setcc(Assembler::notZero, $dst$$Register);
16605 __ bind(done);
16606 %}
16607 ins_pipe(pipe_slow);
16608 %}
16609
16610 // Manifest a CmpL result in an integer register. Very painful.
16611 // This is the test to avoid.
16612 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16613 %{
16614 match(Set dst (CmpL3 src1 src2));
16615 effect(KILL flags);
16616
16617 ins_cost(275); // XXX
16618 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16619 "movl $dst, -1\n\t"
16620 "jl,s done\n\t"
16621 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16622 "done:" %}
16623 ins_encode %{
16624 Label done;
16625 __ cmpq($src1$$Register, $src2$$Register);
16626 __ movl($dst$$Register, -1);
16627 __ jccb(Assembler::less, done);
16628 __ setcc(Assembler::notZero, $dst$$Register);
16629 __ bind(done);
16630 %}
16631 ins_pipe(pipe_slow);
16632 %}
16633
16634 // Manifest a CmpUL result in an integer register. Very painful.
16635 // This is the test to avoid.
16636 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16637 %{
16638 match(Set dst (CmpUL3 src1 src2));
16639 effect(KILL flags);
16640
16641 ins_cost(275); // XXX
16642 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16643 "movl $dst, -1\n\t"
16644 "jb,u done\n\t"
16645 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16646 "done:" %}
16647 ins_encode %{
16648 Label done;
16649 __ cmpq($src1$$Register, $src2$$Register);
16650 __ movl($dst$$Register, -1);
16651 __ jccb(Assembler::below, done);
16652 __ setcc(Assembler::notZero, $dst$$Register);
16653 __ bind(done);
16654 %}
16655 ins_pipe(pipe_slow);
16656 %}
16657
16658 // Unsigned long compare Instructions; really, same as signed long except they
16659 // produce an rFlagsRegU instead of rFlagsReg.
16660 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16661 %{
16662 match(Set cr (CmpUL op1 op2));
16663
16664 format %{ "cmpq $op1, $op2\t# unsigned" %}
16665 ins_encode %{
16666 __ cmpq($op1$$Register, $op2$$Register);
16667 %}
16668 ins_pipe(ialu_cr_reg_reg);
16669 %}
16670
16671 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16672 %{
16673 match(Set cr (CmpUL op1 op2));
16674
16675 format %{ "cmpq $op1, $op2\t# unsigned" %}
16676 ins_encode %{
16677 __ cmpq($op1$$Register, $op2$$constant);
16678 %}
16679 ins_pipe(ialu_cr_reg_imm);
16680 %}
16681
16682 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16683 %{
16684 match(Set cr (CmpUL op1 (LoadL op2)));
16685
16686 format %{ "cmpq $op1, $op2\t# unsigned" %}
16687 ins_encode %{
16688 __ cmpq($op1$$Register, $op2$$Address);
16689 %}
16690 ins_pipe(ialu_cr_reg_mem);
16691 %}
16692
16693 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16694 %{
16695 match(Set cr (CmpUL src zero));
16696
16697 format %{ "testq $src, $src\t# unsigned" %}
16698 ins_encode %{
16699 __ testq($src$$Register, $src$$Register);
16700 %}
16701 ins_pipe(ialu_cr_reg_imm);
16702 %}
16703
16704 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16705 %{
16706 match(Set cr (CmpI (LoadB mem) imm));
16707
16708 ins_cost(125);
16709 format %{ "cmpb $mem, $imm" %}
16710 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16711 ins_pipe(ialu_cr_reg_mem);
16712 %}
16713
16714 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16715 %{
16716 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16717
16718 ins_cost(125);
16719 format %{ "testb $mem, $imm\t# ubyte" %}
16720 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16721 ins_pipe(ialu_cr_reg_mem);
16722 %}
16723
16724 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16725 %{
16726 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16727
16728 ins_cost(125);
16729 format %{ "testb $mem, $imm\t# byte" %}
16730 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16731 ins_pipe(ialu_cr_reg_mem);
16732 %}
16733
16734 //----------Max and Min--------------------------------------------------------
16735 // Min Instructions
16736
16737 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16738 %{
16739 predicate(!UseAPX);
16740 effect(USE_DEF dst, USE src, USE cr);
16741
16742 format %{ "cmovlgt $dst, $src\t# min" %}
16743 ins_encode %{
16744 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16745 %}
16746 ins_pipe(pipe_cmov_reg);
16747 %}
16748
16749 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16750 %{
16751 predicate(UseAPX);
16752 effect(DEF dst, USE src1, USE src2, USE cr);
16753
16754 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16755 ins_encode %{
16756 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16757 %}
16758 ins_pipe(pipe_cmov_reg);
16759 %}
16760
16761 instruct minI_rReg(rRegI dst, rRegI src)
16762 %{
16763 predicate(!UseAPX);
16764 match(Set dst (MinI dst src));
16765
16766 ins_cost(200);
16767 expand %{
16768 rFlagsReg cr;
16769 compI_rReg(cr, dst, src);
16770 cmovI_reg_g(dst, src, cr);
16771 %}
16772 %}
16773
16774 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16775 %{
16776 predicate(UseAPX);
16777 match(Set dst (MinI src1 src2));
16778 effect(DEF dst, USE src1, USE src2);
16779 flag(PD::Flag_ndd_demotable_opr1);
16780
16781 ins_cost(200);
16782 expand %{
16783 rFlagsReg cr;
16784 compI_rReg(cr, src1, src2);
16785 cmovI_reg_g_ndd(dst, src1, src2, cr);
16786 %}
16787 %}
16788
16789 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16790 %{
16791 predicate(!UseAPX);
16792 effect(USE_DEF dst, USE src, USE cr);
16793
16794 format %{ "cmovllt $dst, $src\t# max" %}
16795 ins_encode %{
16796 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16797 %}
16798 ins_pipe(pipe_cmov_reg);
16799 %}
16800
16801 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16802 %{
16803 predicate(UseAPX);
16804 effect(DEF dst, USE src1, USE src2, USE cr);
16805
16806 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16807 ins_encode %{
16808 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16809 %}
16810 ins_pipe(pipe_cmov_reg);
16811 %}
16812
16813 instruct maxI_rReg(rRegI dst, rRegI src)
16814 %{
16815 predicate(!UseAPX);
16816 match(Set dst (MaxI dst src));
16817
16818 ins_cost(200);
16819 expand %{
16820 rFlagsReg cr;
16821 compI_rReg(cr, dst, src);
16822 cmovI_reg_l(dst, src, cr);
16823 %}
16824 %}
16825
16826 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16827 %{
16828 predicate(UseAPX);
16829 match(Set dst (MaxI src1 src2));
16830 effect(DEF dst, USE src1, USE src2);
16831 flag(PD::Flag_ndd_demotable_opr1);
16832
16833 ins_cost(200);
16834 expand %{
16835 rFlagsReg cr;
16836 compI_rReg(cr, src1, src2);
16837 cmovI_reg_l_ndd(dst, src1, src2, cr);
16838 %}
16839 %}
16840
16841 // ============================================================================
16842 // Branch Instructions
16843
16844 // Jump Direct - Label defines a relative address from JMP+1
16845 instruct jmpDir(label labl)
16846 %{
16847 match(Goto);
16848 effect(USE labl);
16849
16850 ins_cost(300);
16851 format %{ "jmp $labl" %}
16852 size(5);
16853 ins_encode %{
16854 Label* L = $labl$$label;
16855 __ jmp(*L, false); // Always long jump
16856 %}
16857 ins_pipe(pipe_jmp);
16858 %}
16859
16860 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16861 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16862 %{
16863 match(If cop cr);
16864 effect(USE labl);
16865
16866 ins_cost(300);
16867 format %{ "j$cop $labl" %}
16868 size(6);
16869 ins_encode %{
16870 Label* L = $labl$$label;
16871 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16872 %}
16873 ins_pipe(pipe_jcc);
16874 %}
16875
16876 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16877 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16878 %{
16879 match(CountedLoopEnd cop cr);
16880 effect(USE labl);
16881
16882 ins_cost(300);
16883 format %{ "j$cop $labl\t# loop end" %}
16884 size(6);
16885 ins_encode %{
16886 Label* L = $labl$$label;
16887 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16888 %}
16889 ins_pipe(pipe_jcc);
16890 %}
16891
16892 // Jump Direct Conditional - using unsigned comparison
16893 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16894 match(If cop cmp);
16895 effect(USE labl);
16896
16897 ins_cost(300);
16898 format %{ "j$cop,u $labl" %}
16899 size(6);
16900 ins_encode %{
16901 Label* L = $labl$$label;
16902 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16903 %}
16904 ins_pipe(pipe_jcc);
16905 %}
16906
16907 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16908 match(If cop cmp);
16909 effect(USE labl);
16910
16911 ins_cost(200);
16912 format %{ "j$cop,u $labl" %}
16913 size(6);
16914 ins_encode %{
16915 Label* L = $labl$$label;
16916 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16917 %}
16918 ins_pipe(pipe_jcc);
16919 %}
16920
16921 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16922 match(If cop cmp);
16923 effect(USE labl);
16924
16925 ins_cost(200);
16926 format %{ $$template
16927 if ($cop$$cmpcode == Assembler::notEqual) {
16928 $$emit$$"jp,u $labl\n\t"
16929 $$emit$$"j$cop,u $labl"
16930 } else {
16931 $$emit$$"jp,u done\n\t"
16932 $$emit$$"j$cop,u $labl\n\t"
16933 $$emit$$"done:"
16934 }
16935 %}
16936 ins_encode %{
16937 Label* l = $labl$$label;
16938 if ($cop$$cmpcode == Assembler::notEqual) {
16939 __ jcc(Assembler::parity, *l, false);
16940 __ jcc(Assembler::notEqual, *l, false);
16941 } else if ($cop$$cmpcode == Assembler::equal) {
16942 Label done;
16943 __ jccb(Assembler::parity, done);
16944 __ jcc(Assembler::equal, *l, false);
16945 __ bind(done);
16946 } else {
16947 ShouldNotReachHere();
16948 }
16949 %}
16950 ins_pipe(pipe_jcc);
16951 %}
16952
16953 // Jump Direct Conditional - using signed and unsigned comparison
16954 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16955 match(If cop cmp);
16956 effect(USE labl);
16957
16958 ins_cost(200);
16959 format %{ "j$cop,su $labl" %}
16960 size(6);
16961 ins_encode %{
16962 Label* L = $labl$$label;
16963 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16964 %}
16965 ins_pipe(pipe_jcc);
16966 %}
16967
16968 // ============================================================================
16969 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16970 // superklass array for an instance of the superklass. Set a hidden
16971 // internal cache on a hit (cache is checked with exposed code in
16972 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16973 // encoding ALSO sets flags.
16974
16975 instruct partialSubtypeCheck(rdi_RegP result,
16976 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16977 rFlagsReg cr)
16978 %{
16979 match(Set result (PartialSubtypeCheck sub super));
16980 predicate(!UseSecondarySupersTable);
16981 effect(KILL rcx, KILL cr);
16982
16983 ins_cost(1100); // slightly larger than the next version
16984 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16985 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16986 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16987 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16988 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16989 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16990 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16991 "miss:\t" %}
16992
16993 ins_encode %{
16994 Label miss;
16995 // NB: Callers may assume that, when $result is a valid register,
16996 // check_klass_subtype_slow_path_linear sets it to a nonzero
16997 // value.
16998 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16999 $rcx$$Register, $result$$Register,
17000 nullptr, &miss,
17001 /*set_cond_codes:*/ true);
17002 __ xorptr($result$$Register, $result$$Register);
17003 __ bind(miss);
17004 %}
17005
17006 ins_pipe(pipe_slow);
17007 %}
17008
17009 // ============================================================================
17010 // Two versions of hashtable-based partialSubtypeCheck, both used when
17011 // we need to search for a super class in the secondary supers array.
17012 // The first is used when we don't know _a priori_ the class being
17013 // searched for. The second, far more common, is used when we do know:
17014 // this is used for instanceof, checkcast, and any case where C2 can
17015 // determine it by constant propagation.
17016
17017 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17018 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17019 rFlagsReg cr)
17020 %{
17021 match(Set result (PartialSubtypeCheck sub super));
17022 predicate(UseSecondarySupersTable);
17023 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17024
17025 ins_cost(1000);
17026 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17027
17028 ins_encode %{
17029 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17030 $temp3$$Register, $temp4$$Register, $result$$Register);
17031 %}
17032
17033 ins_pipe(pipe_slow);
17034 %}
17035
17036 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17037 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17038 rFlagsReg cr)
17039 %{
17040 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17041 predicate(UseSecondarySupersTable);
17042 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17043
17044 ins_cost(700); // smaller than the next version
17045 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17046
17047 ins_encode %{
17048 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17049 if (InlineSecondarySupersTest) {
17050 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17051 $temp3$$Register, $temp4$$Register, $result$$Register,
17052 super_klass_slot);
17053 } else {
17054 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17055 }
17056 %}
17057
17058 ins_pipe(pipe_slow);
17059 %}
17060
17061 // ============================================================================
17062 // Branch Instructions -- short offset versions
17063 //
17064 // These instructions are used to replace jumps of a long offset (the default
17065 // match) with jumps of a shorter offset. These instructions are all tagged
17066 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17067 // match rules in general matching. Instead, the ADLC generates a conversion
17068 // method in the MachNode which can be used to do in-place replacement of the
17069 // long variant with the shorter variant. The compiler will determine if a
17070 // branch can be taken by the is_short_branch_offset() predicate in the machine
17071 // specific code section of the file.
17072
17073 // Jump Direct - Label defines a relative address from JMP+1
17074 instruct jmpDir_short(label labl) %{
17075 match(Goto);
17076 effect(USE labl);
17077
17078 ins_cost(300);
17079 format %{ "jmp,s $labl" %}
17080 size(2);
17081 ins_encode %{
17082 Label* L = $labl$$label;
17083 __ jmpb(*L);
17084 %}
17085 ins_pipe(pipe_jmp);
17086 ins_short_branch(1);
17087 %}
17088
17089 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17090 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17091 match(If cop cr);
17092 effect(USE labl);
17093
17094 ins_cost(300);
17095 format %{ "j$cop,s $labl" %}
17096 size(2);
17097 ins_encode %{
17098 Label* L = $labl$$label;
17099 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17100 %}
17101 ins_pipe(pipe_jcc);
17102 ins_short_branch(1);
17103 %}
17104
17105 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17106 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17107 match(CountedLoopEnd cop cr);
17108 effect(USE labl);
17109
17110 ins_cost(300);
17111 format %{ "j$cop,s $labl\t# loop end" %}
17112 size(2);
17113 ins_encode %{
17114 Label* L = $labl$$label;
17115 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17116 %}
17117 ins_pipe(pipe_jcc);
17118 ins_short_branch(1);
17119 %}
17120
17121 // Jump Direct Conditional - using unsigned comparison
17122 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17123 match(If cop cmp);
17124 effect(USE labl);
17125
17126 ins_cost(300);
17127 format %{ "j$cop,us $labl" %}
17128 size(2);
17129 ins_encode %{
17130 Label* L = $labl$$label;
17131 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17132 %}
17133 ins_pipe(pipe_jcc);
17134 ins_short_branch(1);
17135 %}
17136
17137 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17138 match(If cop cmp);
17139 effect(USE labl);
17140
17141 ins_cost(300);
17142 format %{ "j$cop,us $labl" %}
17143 size(2);
17144 ins_encode %{
17145 Label* L = $labl$$label;
17146 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17147 %}
17148 ins_pipe(pipe_jcc);
17149 ins_short_branch(1);
17150 %}
17151
17152 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17153 match(If cop cmp);
17154 effect(USE labl);
17155
17156 ins_cost(300);
17157 format %{ $$template
17158 if ($cop$$cmpcode == Assembler::notEqual) {
17159 $$emit$$"jp,u,s $labl\n\t"
17160 $$emit$$"j$cop,u,s $labl"
17161 } else {
17162 $$emit$$"jp,u,s done\n\t"
17163 $$emit$$"j$cop,u,s $labl\n\t"
17164 $$emit$$"done:"
17165 }
17166 %}
17167 size(4);
17168 ins_encode %{
17169 Label* l = $labl$$label;
17170 if ($cop$$cmpcode == Assembler::notEqual) {
17171 __ jccb(Assembler::parity, *l);
17172 __ jccb(Assembler::notEqual, *l);
17173 } else if ($cop$$cmpcode == Assembler::equal) {
17174 Label done;
17175 __ jccb(Assembler::parity, done);
17176 __ jccb(Assembler::equal, *l);
17177 __ bind(done);
17178 } else {
17179 ShouldNotReachHere();
17180 }
17181 %}
17182 ins_pipe(pipe_jcc);
17183 ins_short_branch(1);
17184 %}
17185
17186 // Jump Direct Conditional - using signed and unsigned comparison
17187 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17188 match(If cop cmp);
17189 effect(USE labl);
17190
17191 ins_cost(300);
17192 format %{ "j$cop,sus $labl" %}
17193 size(2);
17194 ins_encode %{
17195 Label* L = $labl$$label;
17196 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17197 %}
17198 ins_pipe(pipe_jcc);
17199 ins_short_branch(1);
17200 %}
17201
17202 // ============================================================================
17203 // inlined locking and unlocking
17204
17205 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17206 match(Set cr (FastLock object box));
17207 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17208 ins_cost(300);
17209 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17210 ins_encode %{
17211 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17212 %}
17213 ins_pipe(pipe_slow);
17214 %}
17215
17216 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17217 match(Set cr (FastUnlock object rax_reg));
17218 effect(TEMP tmp, USE_KILL rax_reg);
17219 ins_cost(300);
17220 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17221 ins_encode %{
17222 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17223 %}
17224 ins_pipe(pipe_slow);
17225 %}
17226
17227
17228 // ============================================================================
17229 // Safepoint Instructions
17230 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17231 %{
17232 match(SafePoint poll);
17233 effect(KILL cr, USE poll);
17234
17235 format %{ "testl rax, [$poll]\t"
17236 "# Safepoint: poll for GC" %}
17237 ins_cost(125);
17238 ins_encode %{
17239 __ relocate(relocInfo::poll_type);
17240 address pre_pc = __ pc();
17241 __ testl(rax, Address($poll$$Register, 0));
17242 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17243 %}
17244 ins_pipe(ialu_reg_mem);
17245 %}
17246
17247 instruct mask_all_evexL(kReg dst, rRegL src) %{
17248 match(Set dst (MaskAll src));
17249 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17250 ins_encode %{
17251 int mask_len = Matcher::vector_length(this);
17252 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17253 %}
17254 ins_pipe( pipe_slow );
17255 %}
17256
17257 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17258 predicate(Matcher::vector_length(n) > 32);
17259 match(Set dst (MaskAll src));
17260 effect(TEMP tmp);
17261 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17262 ins_encode %{
17263 int mask_len = Matcher::vector_length(this);
17264 __ movslq($tmp$$Register, $src$$Register);
17265 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17266 %}
17267 ins_pipe( pipe_slow );
17268 %}
17269
17270 // ============================================================================
17271 // Procedure Call/Return Instructions
17272 // Call Java Static Instruction
17273 // Note: If this code changes, the corresponding ret_addr_offset() and
17274 // compute_padding() functions will have to be adjusted.
17275 instruct CallStaticJavaDirect(method meth) %{
17276 match(CallStaticJava);
17277 effect(USE meth);
17278
17279 ins_cost(300);
17280 format %{ "call,static " %}
17281 opcode(0xE8); /* E8 cd */
17282 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17283 ins_pipe(pipe_slow);
17284 ins_alignment(4);
17285 %}
17286
17287 // Call Java Dynamic Instruction
17288 // Note: If this code changes, the corresponding ret_addr_offset() and
17289 // compute_padding() functions will have to be adjusted.
17290 instruct CallDynamicJavaDirect(method meth)
17291 %{
17292 match(CallDynamicJava);
17293 effect(USE meth);
17294
17295 ins_cost(300);
17296 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17297 "call,dynamic " %}
17298 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17299 ins_pipe(pipe_slow);
17300 ins_alignment(4);
17301 %}
17302
17303 // Call Runtime Instruction
17304 instruct CallRuntimeDirect(method meth)
17305 %{
17306 match(CallRuntime);
17307 effect(USE meth);
17308
17309 ins_cost(300);
17310 format %{ "call,runtime " %}
17311 ins_encode(clear_avx, Java_To_Runtime(meth));
17312 ins_pipe(pipe_slow);
17313 %}
17314
17315 // Call runtime without safepoint
17316 instruct CallLeafDirect(method meth)
17317 %{
17318 match(CallLeaf);
17319 effect(USE meth);
17320
17321 ins_cost(300);
17322 format %{ "call_leaf,runtime " %}
17323 ins_encode(clear_avx, Java_To_Runtime(meth));
17324 ins_pipe(pipe_slow);
17325 %}
17326
17327 // Call runtime without safepoint and with vector arguments
17328 instruct CallLeafDirectVector(method meth)
17329 %{
17330 match(CallLeafVector);
17331 effect(USE meth);
17332
17333 ins_cost(300);
17334 format %{ "call_leaf,vector " %}
17335 ins_encode(Java_To_Runtime(meth));
17336 ins_pipe(pipe_slow);
17337 %}
17338
17339 // Call runtime without safepoint
17340 instruct CallLeafNoFPDirect(method meth)
17341 %{
17342 match(CallLeafNoFP);
17343 effect(USE meth);
17344
17345 ins_cost(300);
17346 format %{ "call_leaf_nofp,runtime " %}
17347 ins_encode(clear_avx, Java_To_Runtime(meth));
17348 ins_pipe(pipe_slow);
17349 %}
17350
17351 // Return Instruction
17352 // Remove the return address & jump to it.
17353 // Notice: We always emit a nop after a ret to make sure there is room
17354 // for safepoint patching
17355 instruct Ret()
17356 %{
17357 match(Return);
17358
17359 format %{ "ret" %}
17360 ins_encode %{
17361 __ ret(0);
17362 %}
17363 ins_pipe(pipe_jmp);
17364 %}
17365
17366 // Tail Call; Jump from runtime stub to Java code.
17367 // Also known as an 'interprocedural jump'.
17368 // Target of jump will eventually return to caller.
17369 // TailJump below removes the return address.
17370 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17371 // emitted just above the TailCall which has reset rbp to the caller state.
17372 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17373 %{
17374 match(TailCall jump_target method_ptr);
17375
17376 ins_cost(300);
17377 format %{ "jmp $jump_target\t# rbx holds method" %}
17378 ins_encode %{
17379 __ jmp($jump_target$$Register);
17380 %}
17381 ins_pipe(pipe_jmp);
17382 %}
17383
17384 // Tail Jump; remove the return address; jump to target.
17385 // TailCall above leaves the return address around.
17386 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17387 %{
17388 match(TailJump jump_target ex_oop);
17389
17390 ins_cost(300);
17391 format %{ "popq rdx\t# pop return address\n\t"
17392 "jmp $jump_target" %}
17393 ins_encode %{
17394 __ popq(as_Register(RDX_enc));
17395 __ jmp($jump_target$$Register);
17396 %}
17397 ins_pipe(pipe_jmp);
17398 %}
17399
17400 // Forward exception.
17401 instruct ForwardExceptionjmp()
17402 %{
17403 match(ForwardException);
17404
17405 format %{ "jmp forward_exception_stub" %}
17406 ins_encode %{
17407 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17408 %}
17409 ins_pipe(pipe_jmp);
17410 %}
17411
17412 // Create exception oop: created by stack-crawling runtime code.
17413 // Created exception is now available to this handler, and is setup
17414 // just prior to jumping to this handler. No code emitted.
17415 instruct CreateException(rax_RegP ex_oop)
17416 %{
17417 match(Set ex_oop (CreateEx));
17418
17419 size(0);
17420 // use the following format syntax
17421 format %{ "# exception oop is in rax; no code emitted" %}
17422 ins_encode();
17423 ins_pipe(empty);
17424 %}
17425
17426 // Rethrow exception:
17427 // The exception oop will come in the first argument position.
17428 // Then JUMP (not call) to the rethrow stub code.
17429 instruct RethrowException()
17430 %{
17431 match(Rethrow);
17432
17433 // use the following format syntax
17434 format %{ "jmp rethrow_stub" %}
17435 ins_encode %{
17436 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17437 %}
17438 ins_pipe(pipe_jmp);
17439 %}
17440
17441 // ============================================================================
17442 // This name is KNOWN by the ADLC and cannot be changed.
17443 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17444 // for this guy.
17445 instruct tlsLoadP(r15_RegP dst) %{
17446 match(Set dst (ThreadLocal));
17447 effect(DEF dst);
17448
17449 size(0);
17450 format %{ "# TLS is in R15" %}
17451 ins_encode( /*empty encoding*/ );
17452 ins_pipe(ialu_reg_reg);
17453 %}
17454
17455 instruct addF_reg(regF dst, regF src) %{
17456 predicate(UseAVX == 0);
17457 match(Set dst (AddF dst src));
17458
17459 format %{ "addss $dst, $src" %}
17460 ins_cost(150);
17461 ins_encode %{
17462 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17463 %}
17464 ins_pipe(pipe_slow);
17465 %}
17466
17467 instruct addF_mem(regF dst, memory src) %{
17468 predicate(UseAVX == 0);
17469 match(Set dst (AddF dst (LoadF src)));
17470
17471 format %{ "addss $dst, $src" %}
17472 ins_cost(150);
17473 ins_encode %{
17474 __ addss($dst$$XMMRegister, $src$$Address);
17475 %}
17476 ins_pipe(pipe_slow);
17477 %}
17478
17479 instruct addF_imm(regF dst, immF con) %{
17480 predicate(UseAVX == 0);
17481 match(Set dst (AddF dst con));
17482 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17483 ins_cost(150);
17484 ins_encode %{
17485 __ addss($dst$$XMMRegister, $constantaddress($con));
17486 %}
17487 ins_pipe(pipe_slow);
17488 %}
17489
17490 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17491 predicate(UseAVX > 0);
17492 match(Set dst (AddF src1 src2));
17493
17494 format %{ "vaddss $dst, $src1, $src2" %}
17495 ins_cost(150);
17496 ins_encode %{
17497 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17498 %}
17499 ins_pipe(pipe_slow);
17500 %}
17501
17502 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17503 predicate(UseAVX > 0);
17504 match(Set dst (AddF src1 (LoadF src2)));
17505
17506 format %{ "vaddss $dst, $src1, $src2" %}
17507 ins_cost(150);
17508 ins_encode %{
17509 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17510 %}
17511 ins_pipe(pipe_slow);
17512 %}
17513
17514 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17515 predicate(UseAVX > 0);
17516 match(Set dst (AddF src con));
17517
17518 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17519 ins_cost(150);
17520 ins_encode %{
17521 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17522 %}
17523 ins_pipe(pipe_slow);
17524 %}
17525
17526 instruct addD_reg(regD dst, regD src) %{
17527 predicate(UseAVX == 0);
17528 match(Set dst (AddD dst src));
17529
17530 format %{ "addsd $dst, $src" %}
17531 ins_cost(150);
17532 ins_encode %{
17533 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17534 %}
17535 ins_pipe(pipe_slow);
17536 %}
17537
17538 instruct addD_mem(regD dst, memory src) %{
17539 predicate(UseAVX == 0);
17540 match(Set dst (AddD dst (LoadD src)));
17541
17542 format %{ "addsd $dst, $src" %}
17543 ins_cost(150);
17544 ins_encode %{
17545 __ addsd($dst$$XMMRegister, $src$$Address);
17546 %}
17547 ins_pipe(pipe_slow);
17548 %}
17549
17550 instruct addD_imm(regD dst, immD con) %{
17551 predicate(UseAVX == 0);
17552 match(Set dst (AddD dst con));
17553 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17554 ins_cost(150);
17555 ins_encode %{
17556 __ addsd($dst$$XMMRegister, $constantaddress($con));
17557 %}
17558 ins_pipe(pipe_slow);
17559 %}
17560
17561 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17562 predicate(UseAVX > 0);
17563 match(Set dst (AddD src1 src2));
17564
17565 format %{ "vaddsd $dst, $src1, $src2" %}
17566 ins_cost(150);
17567 ins_encode %{
17568 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17569 %}
17570 ins_pipe(pipe_slow);
17571 %}
17572
17573 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17574 predicate(UseAVX > 0);
17575 match(Set dst (AddD src1 (LoadD src2)));
17576
17577 format %{ "vaddsd $dst, $src1, $src2" %}
17578 ins_cost(150);
17579 ins_encode %{
17580 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17581 %}
17582 ins_pipe(pipe_slow);
17583 %}
17584
17585 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17586 predicate(UseAVX > 0);
17587 match(Set dst (AddD src con));
17588
17589 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17590 ins_cost(150);
17591 ins_encode %{
17592 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17593 %}
17594 ins_pipe(pipe_slow);
17595 %}
17596
17597 instruct subF_reg(regF dst, regF src) %{
17598 predicate(UseAVX == 0);
17599 match(Set dst (SubF dst src));
17600
17601 format %{ "subss $dst, $src" %}
17602 ins_cost(150);
17603 ins_encode %{
17604 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17605 %}
17606 ins_pipe(pipe_slow);
17607 %}
17608
17609 instruct subF_mem(regF dst, memory src) %{
17610 predicate(UseAVX == 0);
17611 match(Set dst (SubF dst (LoadF src)));
17612
17613 format %{ "subss $dst, $src" %}
17614 ins_cost(150);
17615 ins_encode %{
17616 __ subss($dst$$XMMRegister, $src$$Address);
17617 %}
17618 ins_pipe(pipe_slow);
17619 %}
17620
17621 instruct subF_imm(regF dst, immF con) %{
17622 predicate(UseAVX == 0);
17623 match(Set dst (SubF dst con));
17624 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17625 ins_cost(150);
17626 ins_encode %{
17627 __ subss($dst$$XMMRegister, $constantaddress($con));
17628 %}
17629 ins_pipe(pipe_slow);
17630 %}
17631
17632 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17633 predicate(UseAVX > 0);
17634 match(Set dst (SubF src1 src2));
17635
17636 format %{ "vsubss $dst, $src1, $src2" %}
17637 ins_cost(150);
17638 ins_encode %{
17639 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17640 %}
17641 ins_pipe(pipe_slow);
17642 %}
17643
17644 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17645 predicate(UseAVX > 0);
17646 match(Set dst (SubF src1 (LoadF src2)));
17647
17648 format %{ "vsubss $dst, $src1, $src2" %}
17649 ins_cost(150);
17650 ins_encode %{
17651 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17652 %}
17653 ins_pipe(pipe_slow);
17654 %}
17655
17656 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17657 predicate(UseAVX > 0);
17658 match(Set dst (SubF src con));
17659
17660 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17661 ins_cost(150);
17662 ins_encode %{
17663 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17664 %}
17665 ins_pipe(pipe_slow);
17666 %}
17667
17668 instruct subD_reg(regD dst, regD src) %{
17669 predicate(UseAVX == 0);
17670 match(Set dst (SubD dst src));
17671
17672 format %{ "subsd $dst, $src" %}
17673 ins_cost(150);
17674 ins_encode %{
17675 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17676 %}
17677 ins_pipe(pipe_slow);
17678 %}
17679
17680 instruct subD_mem(regD dst, memory src) %{
17681 predicate(UseAVX == 0);
17682 match(Set dst (SubD dst (LoadD src)));
17683
17684 format %{ "subsd $dst, $src" %}
17685 ins_cost(150);
17686 ins_encode %{
17687 __ subsd($dst$$XMMRegister, $src$$Address);
17688 %}
17689 ins_pipe(pipe_slow);
17690 %}
17691
17692 instruct subD_imm(regD dst, immD con) %{
17693 predicate(UseAVX == 0);
17694 match(Set dst (SubD dst con));
17695 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17696 ins_cost(150);
17697 ins_encode %{
17698 __ subsd($dst$$XMMRegister, $constantaddress($con));
17699 %}
17700 ins_pipe(pipe_slow);
17701 %}
17702
17703 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17704 predicate(UseAVX > 0);
17705 match(Set dst (SubD src1 src2));
17706
17707 format %{ "vsubsd $dst, $src1, $src2" %}
17708 ins_cost(150);
17709 ins_encode %{
17710 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17711 %}
17712 ins_pipe(pipe_slow);
17713 %}
17714
17715 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17716 predicate(UseAVX > 0);
17717 match(Set dst (SubD src1 (LoadD src2)));
17718
17719 format %{ "vsubsd $dst, $src1, $src2" %}
17720 ins_cost(150);
17721 ins_encode %{
17722 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17723 %}
17724 ins_pipe(pipe_slow);
17725 %}
17726
17727 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17728 predicate(UseAVX > 0);
17729 match(Set dst (SubD src con));
17730
17731 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17732 ins_cost(150);
17733 ins_encode %{
17734 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17735 %}
17736 ins_pipe(pipe_slow);
17737 %}
17738
17739 instruct mulF_reg(regF dst, regF src) %{
17740 predicate(UseAVX == 0);
17741 match(Set dst (MulF dst src));
17742
17743 format %{ "mulss $dst, $src" %}
17744 ins_cost(150);
17745 ins_encode %{
17746 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17747 %}
17748 ins_pipe(pipe_slow);
17749 %}
17750
17751 instruct mulF_mem(regF dst, memory src) %{
17752 predicate(UseAVX == 0);
17753 match(Set dst (MulF dst (LoadF src)));
17754
17755 format %{ "mulss $dst, $src" %}
17756 ins_cost(150);
17757 ins_encode %{
17758 __ mulss($dst$$XMMRegister, $src$$Address);
17759 %}
17760 ins_pipe(pipe_slow);
17761 %}
17762
17763 instruct mulF_imm(regF dst, immF con) %{
17764 predicate(UseAVX == 0);
17765 match(Set dst (MulF dst con));
17766 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17767 ins_cost(150);
17768 ins_encode %{
17769 __ mulss($dst$$XMMRegister, $constantaddress($con));
17770 %}
17771 ins_pipe(pipe_slow);
17772 %}
17773
17774 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17775 predicate(UseAVX > 0);
17776 match(Set dst (MulF src1 src2));
17777
17778 format %{ "vmulss $dst, $src1, $src2" %}
17779 ins_cost(150);
17780 ins_encode %{
17781 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17782 %}
17783 ins_pipe(pipe_slow);
17784 %}
17785
17786 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17787 predicate(UseAVX > 0);
17788 match(Set dst (MulF src1 (LoadF src2)));
17789
17790 format %{ "vmulss $dst, $src1, $src2" %}
17791 ins_cost(150);
17792 ins_encode %{
17793 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17794 %}
17795 ins_pipe(pipe_slow);
17796 %}
17797
17798 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17799 predicate(UseAVX > 0);
17800 match(Set dst (MulF src con));
17801
17802 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17803 ins_cost(150);
17804 ins_encode %{
17805 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17806 %}
17807 ins_pipe(pipe_slow);
17808 %}
17809
17810 instruct mulD_reg(regD dst, regD src) %{
17811 predicate(UseAVX == 0);
17812 match(Set dst (MulD dst src));
17813
17814 format %{ "mulsd $dst, $src" %}
17815 ins_cost(150);
17816 ins_encode %{
17817 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17818 %}
17819 ins_pipe(pipe_slow);
17820 %}
17821
17822 instruct mulD_mem(regD dst, memory src) %{
17823 predicate(UseAVX == 0);
17824 match(Set dst (MulD dst (LoadD src)));
17825
17826 format %{ "mulsd $dst, $src" %}
17827 ins_cost(150);
17828 ins_encode %{
17829 __ mulsd($dst$$XMMRegister, $src$$Address);
17830 %}
17831 ins_pipe(pipe_slow);
17832 %}
17833
17834 instruct mulD_imm(regD dst, immD con) %{
17835 predicate(UseAVX == 0);
17836 match(Set dst (MulD dst con));
17837 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17838 ins_cost(150);
17839 ins_encode %{
17840 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17841 %}
17842 ins_pipe(pipe_slow);
17843 %}
17844
17845 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17846 predicate(UseAVX > 0);
17847 match(Set dst (MulD src1 src2));
17848
17849 format %{ "vmulsd $dst, $src1, $src2" %}
17850 ins_cost(150);
17851 ins_encode %{
17852 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17853 %}
17854 ins_pipe(pipe_slow);
17855 %}
17856
17857 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17858 predicate(UseAVX > 0);
17859 match(Set dst (MulD src1 (LoadD src2)));
17860
17861 format %{ "vmulsd $dst, $src1, $src2" %}
17862 ins_cost(150);
17863 ins_encode %{
17864 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17865 %}
17866 ins_pipe(pipe_slow);
17867 %}
17868
17869 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17870 predicate(UseAVX > 0);
17871 match(Set dst (MulD src con));
17872
17873 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17874 ins_cost(150);
17875 ins_encode %{
17876 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17877 %}
17878 ins_pipe(pipe_slow);
17879 %}
17880
17881 instruct divF_reg(regF dst, regF src) %{
17882 predicate(UseAVX == 0);
17883 match(Set dst (DivF dst src));
17884
17885 format %{ "divss $dst, $src" %}
17886 ins_cost(150);
17887 ins_encode %{
17888 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17889 %}
17890 ins_pipe(pipe_slow);
17891 %}
17892
17893 instruct divF_mem(regF dst, memory src) %{
17894 predicate(UseAVX == 0);
17895 match(Set dst (DivF dst (LoadF src)));
17896
17897 format %{ "divss $dst, $src" %}
17898 ins_cost(150);
17899 ins_encode %{
17900 __ divss($dst$$XMMRegister, $src$$Address);
17901 %}
17902 ins_pipe(pipe_slow);
17903 %}
17904
17905 instruct divF_imm(regF dst, immF con) %{
17906 predicate(UseAVX == 0);
17907 match(Set dst (DivF dst con));
17908 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17909 ins_cost(150);
17910 ins_encode %{
17911 __ divss($dst$$XMMRegister, $constantaddress($con));
17912 %}
17913 ins_pipe(pipe_slow);
17914 %}
17915
17916 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17917 predicate(UseAVX > 0);
17918 match(Set dst (DivF src1 src2));
17919
17920 format %{ "vdivss $dst, $src1, $src2" %}
17921 ins_cost(150);
17922 ins_encode %{
17923 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17924 %}
17925 ins_pipe(pipe_slow);
17926 %}
17927
17928 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17929 predicate(UseAVX > 0);
17930 match(Set dst (DivF src1 (LoadF src2)));
17931
17932 format %{ "vdivss $dst, $src1, $src2" %}
17933 ins_cost(150);
17934 ins_encode %{
17935 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17936 %}
17937 ins_pipe(pipe_slow);
17938 %}
17939
17940 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17941 predicate(UseAVX > 0);
17942 match(Set dst (DivF src con));
17943
17944 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17945 ins_cost(150);
17946 ins_encode %{
17947 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17948 %}
17949 ins_pipe(pipe_slow);
17950 %}
17951
17952 instruct divD_reg(regD dst, regD src) %{
17953 predicate(UseAVX == 0);
17954 match(Set dst (DivD dst src));
17955
17956 format %{ "divsd $dst, $src" %}
17957 ins_cost(150);
17958 ins_encode %{
17959 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17960 %}
17961 ins_pipe(pipe_slow);
17962 %}
17963
17964 instruct divD_mem(regD dst, memory src) %{
17965 predicate(UseAVX == 0);
17966 match(Set dst (DivD dst (LoadD src)));
17967
17968 format %{ "divsd $dst, $src" %}
17969 ins_cost(150);
17970 ins_encode %{
17971 __ divsd($dst$$XMMRegister, $src$$Address);
17972 %}
17973 ins_pipe(pipe_slow);
17974 %}
17975
17976 instruct divD_imm(regD dst, immD con) %{
17977 predicate(UseAVX == 0);
17978 match(Set dst (DivD dst con));
17979 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17980 ins_cost(150);
17981 ins_encode %{
17982 __ divsd($dst$$XMMRegister, $constantaddress($con));
17983 %}
17984 ins_pipe(pipe_slow);
17985 %}
17986
17987 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17988 predicate(UseAVX > 0);
17989 match(Set dst (DivD src1 src2));
17990
17991 format %{ "vdivsd $dst, $src1, $src2" %}
17992 ins_cost(150);
17993 ins_encode %{
17994 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17995 %}
17996 ins_pipe(pipe_slow);
17997 %}
17998
17999 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18000 predicate(UseAVX > 0);
18001 match(Set dst (DivD src1 (LoadD src2)));
18002
18003 format %{ "vdivsd $dst, $src1, $src2" %}
18004 ins_cost(150);
18005 ins_encode %{
18006 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18007 %}
18008 ins_pipe(pipe_slow);
18009 %}
18010
18011 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18012 predicate(UseAVX > 0);
18013 match(Set dst (DivD src con));
18014
18015 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18016 ins_cost(150);
18017 ins_encode %{
18018 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18019 %}
18020 ins_pipe(pipe_slow);
18021 %}
18022
18023 instruct absF_reg(regF dst) %{
18024 predicate(UseAVX == 0);
18025 match(Set dst (AbsF dst));
18026 ins_cost(150);
18027 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18028 ins_encode %{
18029 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18030 %}
18031 ins_pipe(pipe_slow);
18032 %}
18033
18034 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18035 predicate(UseAVX > 0);
18036 match(Set dst (AbsF src));
18037 ins_cost(150);
18038 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18039 ins_encode %{
18040 int vlen_enc = Assembler::AVX_128bit;
18041 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18042 ExternalAddress(float_signmask()), vlen_enc);
18043 %}
18044 ins_pipe(pipe_slow);
18045 %}
18046
18047 instruct absD_reg(regD dst) %{
18048 predicate(UseAVX == 0);
18049 match(Set dst (AbsD dst));
18050 ins_cost(150);
18051 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18052 "# abs double by sign masking" %}
18053 ins_encode %{
18054 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18055 %}
18056 ins_pipe(pipe_slow);
18057 %}
18058
18059 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18060 predicate(UseAVX > 0);
18061 match(Set dst (AbsD src));
18062 ins_cost(150);
18063 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18064 "# abs double by sign masking" %}
18065 ins_encode %{
18066 int vlen_enc = Assembler::AVX_128bit;
18067 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18068 ExternalAddress(double_signmask()), vlen_enc);
18069 %}
18070 ins_pipe(pipe_slow);
18071 %}
18072
18073 instruct negF_reg(regF dst) %{
18074 predicate(UseAVX == 0);
18075 match(Set dst (NegF dst));
18076 ins_cost(150);
18077 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18078 ins_encode %{
18079 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18080 %}
18081 ins_pipe(pipe_slow);
18082 %}
18083
18084 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18085 predicate(UseAVX > 0);
18086 match(Set dst (NegF src));
18087 ins_cost(150);
18088 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18089 ins_encode %{
18090 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18091 ExternalAddress(float_signflip()));
18092 %}
18093 ins_pipe(pipe_slow);
18094 %}
18095
18096 instruct negD_reg(regD dst) %{
18097 predicate(UseAVX == 0);
18098 match(Set dst (NegD dst));
18099 ins_cost(150);
18100 format %{ "xorpd $dst, [0x8000000000000000]\t"
18101 "# neg double by sign flipping" %}
18102 ins_encode %{
18103 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18104 %}
18105 ins_pipe(pipe_slow);
18106 %}
18107
18108 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18109 predicate(UseAVX > 0);
18110 match(Set dst (NegD src));
18111 ins_cost(150);
18112 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18113 "# neg double by sign flipping" %}
18114 ins_encode %{
18115 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18116 ExternalAddress(double_signflip()));
18117 %}
18118 ins_pipe(pipe_slow);
18119 %}
18120
18121 // sqrtss instruction needs destination register to be pre initialized for best performance
18122 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18123 instruct sqrtF_reg(regF dst) %{
18124 match(Set dst (SqrtF dst));
18125 format %{ "sqrtss $dst, $dst" %}
18126 ins_encode %{
18127 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18128 %}
18129 ins_pipe(pipe_slow);
18130 %}
18131
18132 // sqrtsd instruction needs destination register to be pre initialized for best performance
18133 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18134 instruct sqrtD_reg(regD dst) %{
18135 match(Set dst (SqrtD dst));
18136 format %{ "sqrtsd $dst, $dst" %}
18137 ins_encode %{
18138 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18139 %}
18140 ins_pipe(pipe_slow);
18141 %}
18142
18143 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18144 effect(TEMP tmp);
18145 match(Set dst (ConvF2HF src));
18146 ins_cost(125);
18147 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18148 ins_encode %{
18149 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18150 %}
18151 ins_pipe( pipe_slow );
18152 %}
18153
18154 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18155 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18156 effect(TEMP ktmp, TEMP rtmp);
18157 match(Set mem (StoreC mem (ConvF2HF src)));
18158 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18159 ins_encode %{
18160 __ movl($rtmp$$Register, 0x1);
18161 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18162 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18163 %}
18164 ins_pipe( pipe_slow );
18165 %}
18166
18167 instruct vconvF2HF(vec dst, vec src) %{
18168 match(Set dst (VectorCastF2HF src));
18169 format %{ "vector_conv_F2HF $dst $src" %}
18170 ins_encode %{
18171 int vlen_enc = vector_length_encoding(this, $src);
18172 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18173 %}
18174 ins_pipe( pipe_slow );
18175 %}
18176
18177 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18178 predicate(n->as_StoreVector()->memory_size() >= 16);
18179 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18180 format %{ "vcvtps2ph $mem,$src" %}
18181 ins_encode %{
18182 int vlen_enc = vector_length_encoding(this, $src);
18183 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18184 %}
18185 ins_pipe( pipe_slow );
18186 %}
18187
18188 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18189 match(Set dst (ConvHF2F src));
18190 format %{ "vcvtph2ps $dst,$src" %}
18191 ins_encode %{
18192 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18193 %}
18194 ins_pipe( pipe_slow );
18195 %}
18196
18197 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18198 match(Set dst (VectorCastHF2F (LoadVector mem)));
18199 format %{ "vcvtph2ps $dst,$mem" %}
18200 ins_encode %{
18201 int vlen_enc = vector_length_encoding(this);
18202 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18203 %}
18204 ins_pipe( pipe_slow );
18205 %}
18206
18207 instruct vconvHF2F(vec dst, vec src) %{
18208 match(Set dst (VectorCastHF2F src));
18209 ins_cost(125);
18210 format %{ "vector_conv_HF2F $dst,$src" %}
18211 ins_encode %{
18212 int vlen_enc = vector_length_encoding(this);
18213 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18214 %}
18215 ins_pipe( pipe_slow );
18216 %}
18217
18218 // ---------------------------------------- VectorReinterpret ------------------------------------
18219 instruct reinterpret_mask(kReg dst) %{
18220 predicate(n->bottom_type()->isa_vectmask() &&
18221 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18222 match(Set dst (VectorReinterpret dst));
18223 ins_cost(125);
18224 format %{ "vector_reinterpret $dst\t!" %}
18225 ins_encode %{
18226 // empty
18227 %}
18228 ins_pipe( pipe_slow );
18229 %}
18230
18231 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18232 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18233 n->bottom_type()->isa_vectmask() &&
18234 n->in(1)->bottom_type()->isa_vectmask() &&
18235 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
18236 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18237 match(Set dst (VectorReinterpret src));
18238 effect(TEMP xtmp);
18239 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18240 ins_encode %{
18241 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18242 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18243 assert(src_sz == dst_sz , "src and dst size mismatch");
18244 int vlen_enc = vector_length_encoding(src_sz);
18245 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18246 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18247 %}
18248 ins_pipe( pipe_slow );
18249 %}
18250
18251 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18252 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18253 n->bottom_type()->isa_vectmask() &&
18254 n->in(1)->bottom_type()->isa_vectmask() &&
18255 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
18256 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
18257 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18258 match(Set dst (VectorReinterpret src));
18259 effect(TEMP xtmp);
18260 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18261 ins_encode %{
18262 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18263 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18264 assert(src_sz == dst_sz , "src and dst size mismatch");
18265 int vlen_enc = vector_length_encoding(src_sz);
18266 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18267 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18268 %}
18269 ins_pipe( pipe_slow );
18270 %}
18271
18272 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18273 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18274 n->bottom_type()->isa_vectmask() &&
18275 n->in(1)->bottom_type()->isa_vectmask() &&
18276 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
18277 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
18278 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
18279 match(Set dst (VectorReinterpret src));
18280 effect(TEMP xtmp);
18281 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18282 ins_encode %{
18283 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18284 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18285 assert(src_sz == dst_sz , "src and dst size mismatch");
18286 int vlen_enc = vector_length_encoding(src_sz);
18287 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18288 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18289 %}
18290 ins_pipe( pipe_slow );
18291 %}
18292
18293 instruct reinterpret(vec dst) %{
18294 predicate(!n->bottom_type()->isa_vectmask() &&
18295 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18296 match(Set dst (VectorReinterpret dst));
18297 ins_cost(125);
18298 format %{ "vector_reinterpret $dst\t!" %}
18299 ins_encode %{
18300 // empty
18301 %}
18302 ins_pipe( pipe_slow );
18303 %}
18304
18305 instruct reinterpret_expand(vec dst, vec src) %{
18306 predicate(UseAVX == 0 &&
18307 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18308 match(Set dst (VectorReinterpret src));
18309 ins_cost(125);
18310 effect(TEMP dst);
18311 format %{ "vector_reinterpret_expand $dst,$src" %}
18312 ins_encode %{
18313 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18314 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18315
18316 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18317 if (src_vlen_in_bytes == 4) {
18318 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18319 } else {
18320 assert(src_vlen_in_bytes == 8, "");
18321 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18322 }
18323 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18324 %}
18325 ins_pipe( pipe_slow );
18326 %}
18327
18328 instruct vreinterpret_expand4(legVec dst, vec src) %{
18329 predicate(UseAVX > 0 &&
18330 !n->bottom_type()->isa_vectmask() &&
18331 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18332 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18333 match(Set dst (VectorReinterpret src));
18334 ins_cost(125);
18335 format %{ "vector_reinterpret_expand $dst,$src" %}
18336 ins_encode %{
18337 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18338 %}
18339 ins_pipe( pipe_slow );
18340 %}
18341
18342
18343 instruct vreinterpret_expand(legVec dst, vec src) %{
18344 predicate(UseAVX > 0 &&
18345 !n->bottom_type()->isa_vectmask() &&
18346 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18347 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18348 match(Set dst (VectorReinterpret src));
18349 ins_cost(125);
18350 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18351 ins_encode %{
18352 switch (Matcher::vector_length_in_bytes(this, $src)) {
18353 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18354 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18355 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18356 default: ShouldNotReachHere();
18357 }
18358 %}
18359 ins_pipe( pipe_slow );
18360 %}
18361
18362 instruct reinterpret_shrink(vec dst, legVec src) %{
18363 predicate(!n->bottom_type()->isa_vectmask() &&
18364 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18365 match(Set dst (VectorReinterpret src));
18366 ins_cost(125);
18367 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18368 ins_encode %{
18369 switch (Matcher::vector_length_in_bytes(this)) {
18370 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18371 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18372 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18373 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18374 default: ShouldNotReachHere();
18375 }
18376 %}
18377 ins_pipe( pipe_slow );
18378 %}
18379
18380 // ----------------------------------------------------------------------------------------------------
18381
18382 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18383 match(Set dst (RoundDoubleMode src rmode));
18384 format %{ "roundsd $dst,$src" %}
18385 ins_cost(150);
18386 ins_encode %{
18387 assert(UseSSE >= 4, "required");
18388 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18389 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18390 }
18391 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18392 %}
18393 ins_pipe(pipe_slow);
18394 %}
18395
18396 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18397 match(Set dst (RoundDoubleMode con rmode));
18398 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18399 ins_cost(150);
18400 ins_encode %{
18401 assert(UseSSE >= 4, "required");
18402 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18403 %}
18404 ins_pipe(pipe_slow);
18405 %}
18406
18407 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18408 predicate(Matcher::vector_length(n) < 8);
18409 match(Set dst (RoundDoubleModeV src rmode));
18410 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18411 ins_encode %{
18412 assert(UseAVX > 0, "required");
18413 int vlen_enc = vector_length_encoding(this);
18414 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18415 %}
18416 ins_pipe( pipe_slow );
18417 %}
18418
18419 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18420 predicate(Matcher::vector_length(n) == 8);
18421 match(Set dst (RoundDoubleModeV src rmode));
18422 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18423 ins_encode %{
18424 assert(UseAVX > 2, "required");
18425 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18426 %}
18427 ins_pipe( pipe_slow );
18428 %}
18429
18430 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18431 predicate(Matcher::vector_length(n) < 8);
18432 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18433 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18434 ins_encode %{
18435 assert(UseAVX > 0, "required");
18436 int vlen_enc = vector_length_encoding(this);
18437 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18438 %}
18439 ins_pipe( pipe_slow );
18440 %}
18441
18442 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18443 predicate(Matcher::vector_length(n) == 8);
18444 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18445 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18446 ins_encode %{
18447 assert(UseAVX > 2, "required");
18448 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18449 %}
18450 ins_pipe( pipe_slow );
18451 %}
18452
18453 instruct onspinwait() %{
18454 match(OnSpinWait);
18455 ins_cost(200);
18456
18457 format %{
18458 $$template
18459 $$emit$$"pause\t! membar_onspinwait"
18460 %}
18461 ins_encode %{
18462 __ pause();
18463 %}
18464 ins_pipe(pipe_slow);
18465 %}
18466
18467 // a * b + c
18468 instruct fmaD_reg(regD a, regD b, regD c) %{
18469 match(Set c (FmaD c (Binary a b)));
18470 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18471 ins_cost(150);
18472 ins_encode %{
18473 assert(UseFMA, "Needs FMA instructions support.");
18474 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18475 %}
18476 ins_pipe( pipe_slow );
18477 %}
18478
18479 // a * b + c
18480 instruct fmaF_reg(regF a, regF b, regF c) %{
18481 match(Set c (FmaF c (Binary a b)));
18482 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18483 ins_cost(150);
18484 ins_encode %{
18485 assert(UseFMA, "Needs FMA instructions support.");
18486 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18487 %}
18488 ins_pipe( pipe_slow );
18489 %}
18490
18491 // ====================VECTOR INSTRUCTIONS=====================================
18492
18493 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18494 instruct MoveVec2Leg(legVec dst, vec src) %{
18495 match(Set dst src);
18496 format %{ "" %}
18497 ins_encode %{
18498 ShouldNotReachHere();
18499 %}
18500 ins_pipe( fpu_reg_reg );
18501 %}
18502
18503 instruct MoveLeg2Vec(vec dst, legVec src) %{
18504 match(Set dst src);
18505 format %{ "" %}
18506 ins_encode %{
18507 ShouldNotReachHere();
18508 %}
18509 ins_pipe( fpu_reg_reg );
18510 %}
18511
18512 // ============================================================================
18513
18514 // Load vectors generic operand pattern
18515 instruct loadV(vec dst, memory mem) %{
18516 match(Set dst (LoadVector mem));
18517 ins_cost(125);
18518 format %{ "load_vector $dst,$mem" %}
18519 ins_encode %{
18520 BasicType bt = Matcher::vector_element_basic_type(this);
18521 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18522 %}
18523 ins_pipe( pipe_slow );
18524 %}
18525
18526 // Store vectors generic operand pattern.
18527 instruct storeV(memory mem, vec src) %{
18528 match(Set mem (StoreVector mem src));
18529 ins_cost(145);
18530 format %{ "store_vector $mem,$src\n\t" %}
18531 ins_encode %{
18532 switch (Matcher::vector_length_in_bytes(this, $src)) {
18533 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18534 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18535 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18536 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18537 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18538 default: ShouldNotReachHere();
18539 }
18540 %}
18541 ins_pipe( pipe_slow );
18542 %}
18543
18544 // ---------------------------------------- Gather ------------------------------------
18545
18546 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18547
18548 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18549 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18550 Matcher::vector_length_in_bytes(n) <= 32);
18551 match(Set dst (LoadVectorGather mem idx));
18552 effect(TEMP dst, TEMP tmp, TEMP mask);
18553 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18554 ins_encode %{
18555 int vlen_enc = vector_length_encoding(this);
18556 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18557 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18558 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18559 __ lea($tmp$$Register, $mem$$Address);
18560 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18561 %}
18562 ins_pipe( pipe_slow );
18563 %}
18564
18565
18566 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18567 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18568 !is_subword_type(Matcher::vector_element_basic_type(n)));
18569 match(Set dst (LoadVectorGather mem idx));
18570 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18571 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18572 ins_encode %{
18573 int vlen_enc = vector_length_encoding(this);
18574 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18575 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18576 __ lea($tmp$$Register, $mem$$Address);
18577 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18578 %}
18579 ins_pipe( pipe_slow );
18580 %}
18581
18582 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18583 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18584 !is_subword_type(Matcher::vector_element_basic_type(n)));
18585 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18586 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18587 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18588 ins_encode %{
18589 assert(UseAVX > 2, "sanity");
18590 int vlen_enc = vector_length_encoding(this);
18591 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18592 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18593 // Note: Since gather instruction partially updates the opmask register used
18594 // for predication hense moving mask operand to a temporary.
18595 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18596 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18597 __ lea($tmp$$Register, $mem$$Address);
18598 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18599 %}
18600 ins_pipe( pipe_slow );
18601 %}
18602
18603 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18604 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18605 match(Set dst (LoadVectorGather mem idx_base));
18606 effect(TEMP tmp, TEMP rtmp);
18607 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18608 ins_encode %{
18609 int vlen_enc = vector_length_encoding(this);
18610 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18611 __ lea($tmp$$Register, $mem$$Address);
18612 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18613 %}
18614 ins_pipe( pipe_slow );
18615 %}
18616
18617 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18618 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18619 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18620 match(Set dst (LoadVectorGather mem idx_base));
18621 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18622 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18623 ins_encode %{
18624 int vlen_enc = vector_length_encoding(this);
18625 int vector_len = Matcher::vector_length(this);
18626 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18627 __ lea($tmp$$Register, $mem$$Address);
18628 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18629 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18630 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18631 %}
18632 ins_pipe( pipe_slow );
18633 %}
18634
18635 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18636 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18637 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18638 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18639 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18640 ins_encode %{
18641 int vlen_enc = vector_length_encoding(this);
18642 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18643 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18644 __ lea($tmp$$Register, $mem$$Address);
18645 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18646 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18647 %}
18648 ins_pipe( pipe_slow );
18649 %}
18650
18651 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18652 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18653 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18654 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18655 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18656 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18657 ins_encode %{
18658 int vlen_enc = vector_length_encoding(this);
18659 int vector_len = Matcher::vector_length(this);
18660 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18661 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18662 __ lea($tmp$$Register, $mem$$Address);
18663 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18664 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18665 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18666 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18667 %}
18668 ins_pipe( pipe_slow );
18669 %}
18670
18671 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18672 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18673 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18674 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18675 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18676 ins_encode %{
18677 int vlen_enc = vector_length_encoding(this);
18678 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18679 __ lea($tmp$$Register, $mem$$Address);
18680 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18681 if (elem_bt == T_SHORT) {
18682 __ movl($mask_idx$$Register, 0x55555555);
18683 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18684 }
18685 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18686 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18687 %}
18688 ins_pipe( pipe_slow );
18689 %}
18690
18691 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18692 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18693 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18694 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18695 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18696 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18697 ins_encode %{
18698 int vlen_enc = vector_length_encoding(this);
18699 int vector_len = Matcher::vector_length(this);
18700 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18701 __ lea($tmp$$Register, $mem$$Address);
18702 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18703 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18704 if (elem_bt == T_SHORT) {
18705 __ movl($mask_idx$$Register, 0x55555555);
18706 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18707 }
18708 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18709 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18710 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18711 %}
18712 ins_pipe( pipe_slow );
18713 %}
18714
18715 // ====================Scatter=======================================
18716
18717 // Scatter INT, LONG, FLOAT, DOUBLE
18718
18719 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18720 predicate(UseAVX > 2);
18721 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18722 effect(TEMP tmp, TEMP ktmp);
18723 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18724 ins_encode %{
18725 int vlen_enc = vector_length_encoding(this, $src);
18726 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18727
18728 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18729 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18730
18731 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18732 __ lea($tmp$$Register, $mem$$Address);
18733 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18734 %}
18735 ins_pipe( pipe_slow );
18736 %}
18737
18738 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18739 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18740 effect(TEMP tmp, TEMP ktmp);
18741 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18742 ins_encode %{
18743 int vlen_enc = vector_length_encoding(this, $src);
18744 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18745 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18746 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18747 // Note: Since scatter instruction partially updates the opmask register used
18748 // for predication hense moving mask operand to a temporary.
18749 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18750 __ lea($tmp$$Register, $mem$$Address);
18751 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18752 %}
18753 ins_pipe( pipe_slow );
18754 %}
18755
18756 // ====================REPLICATE=======================================
18757
18758 // Replicate byte scalar to be vector
18759 instruct vReplB_reg(vec dst, rRegI src) %{
18760 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18761 match(Set dst (Replicate src));
18762 format %{ "replicateB $dst,$src" %}
18763 ins_encode %{
18764 uint vlen = Matcher::vector_length(this);
18765 if (UseAVX >= 2) {
18766 int vlen_enc = vector_length_encoding(this);
18767 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18768 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18769 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18770 } else {
18771 __ movdl($dst$$XMMRegister, $src$$Register);
18772 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18773 }
18774 } else {
18775 assert(UseAVX < 2, "");
18776 __ movdl($dst$$XMMRegister, $src$$Register);
18777 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18778 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18779 if (vlen >= 16) {
18780 assert(vlen == 16, "");
18781 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18782 }
18783 }
18784 %}
18785 ins_pipe( pipe_slow );
18786 %}
18787
18788 instruct ReplB_mem(vec dst, memory mem) %{
18789 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18790 match(Set dst (Replicate (LoadB mem)));
18791 format %{ "replicateB $dst,$mem" %}
18792 ins_encode %{
18793 int vlen_enc = vector_length_encoding(this);
18794 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18795 %}
18796 ins_pipe( pipe_slow );
18797 %}
18798
18799 // ====================ReplicateS=======================================
18800
18801 instruct vReplS_reg(vec dst, rRegI src) %{
18802 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18803 match(Set dst (Replicate src));
18804 format %{ "replicateS $dst,$src" %}
18805 ins_encode %{
18806 uint vlen = Matcher::vector_length(this);
18807 int vlen_enc = vector_length_encoding(this);
18808 if (UseAVX >= 2) {
18809 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18810 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18811 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18812 } else {
18813 __ movdl($dst$$XMMRegister, $src$$Register);
18814 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18815 }
18816 } else {
18817 assert(UseAVX < 2, "");
18818 __ movdl($dst$$XMMRegister, $src$$Register);
18819 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18820 if (vlen >= 8) {
18821 assert(vlen == 8, "");
18822 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18823 }
18824 }
18825 %}
18826 ins_pipe( pipe_slow );
18827 %}
18828
18829 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18830 match(Set dst (Replicate con));
18831 effect(TEMP rtmp);
18832 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18833 ins_encode %{
18834 int vlen_enc = vector_length_encoding(this);
18835 BasicType bt = Matcher::vector_element_basic_type(this);
18836 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18837 __ movl($rtmp$$Register, $con$$constant);
18838 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18839 %}
18840 ins_pipe( pipe_slow );
18841 %}
18842
18843 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18844 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18845 match(Set dst (Replicate src));
18846 effect(TEMP rtmp);
18847 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18848 ins_encode %{
18849 int vlen_enc = vector_length_encoding(this);
18850 __ vmovw($rtmp$$Register, $src$$XMMRegister);
18851 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18852 %}
18853 ins_pipe( pipe_slow );
18854 %}
18855
18856 instruct ReplS_mem(vec dst, memory mem) %{
18857 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18858 match(Set dst (Replicate (LoadS mem)));
18859 format %{ "replicateS $dst,$mem" %}
18860 ins_encode %{
18861 int vlen_enc = vector_length_encoding(this);
18862 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18863 %}
18864 ins_pipe( pipe_slow );
18865 %}
18866
18867 // ====================ReplicateI=======================================
18868
18869 instruct ReplI_reg(vec dst, rRegI src) %{
18870 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18871 match(Set dst (Replicate src));
18872 format %{ "replicateI $dst,$src" %}
18873 ins_encode %{
18874 uint vlen = Matcher::vector_length(this);
18875 int vlen_enc = vector_length_encoding(this);
18876 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18877 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18878 } else if (VM_Version::supports_avx2()) {
18879 __ movdl($dst$$XMMRegister, $src$$Register);
18880 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18881 } else {
18882 __ movdl($dst$$XMMRegister, $src$$Register);
18883 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18884 }
18885 %}
18886 ins_pipe( pipe_slow );
18887 %}
18888
18889 instruct ReplI_mem(vec dst, memory mem) %{
18890 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18891 match(Set dst (Replicate (LoadI mem)));
18892 format %{ "replicateI $dst,$mem" %}
18893 ins_encode %{
18894 int vlen_enc = vector_length_encoding(this);
18895 if (VM_Version::supports_avx2()) {
18896 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18897 } else if (VM_Version::supports_avx()) {
18898 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18899 } else {
18900 __ movdl($dst$$XMMRegister, $mem$$Address);
18901 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18902 }
18903 %}
18904 ins_pipe( pipe_slow );
18905 %}
18906
18907 instruct ReplI_imm(vec dst, immI con) %{
18908 predicate(Matcher::is_non_long_integral_vector(n));
18909 match(Set dst (Replicate con));
18910 format %{ "replicateI $dst,$con" %}
18911 ins_encode %{
18912 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18913 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18914 type2aelembytes(Matcher::vector_element_basic_type(this))));
18915 BasicType bt = Matcher::vector_element_basic_type(this);
18916 int vlen = Matcher::vector_length_in_bytes(this);
18917 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18918 %}
18919 ins_pipe( pipe_slow );
18920 %}
18921
18922 // Replicate scalar zero to be vector
18923 instruct ReplI_zero(vec dst, immI_0 zero) %{
18924 predicate(Matcher::is_non_long_integral_vector(n));
18925 match(Set dst (Replicate zero));
18926 format %{ "replicateI $dst,$zero" %}
18927 ins_encode %{
18928 int vlen_enc = vector_length_encoding(this);
18929 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18930 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18931 } else {
18932 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18933 }
18934 %}
18935 ins_pipe( fpu_reg_reg );
18936 %}
18937
18938 instruct ReplI_M1(vec dst, immI_M1 con) %{
18939 predicate(Matcher::is_non_long_integral_vector(n));
18940 match(Set dst (Replicate con));
18941 format %{ "vallones $dst" %}
18942 ins_encode %{
18943 int vector_len = vector_length_encoding(this);
18944 __ vallones($dst$$XMMRegister, vector_len);
18945 %}
18946 ins_pipe( pipe_slow );
18947 %}
18948
18949 // ====================ReplicateL=======================================
18950
18951 // Replicate long (8 byte) scalar to be vector
18952 instruct ReplL_reg(vec dst, rRegL src) %{
18953 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18954 match(Set dst (Replicate src));
18955 format %{ "replicateL $dst,$src" %}
18956 ins_encode %{
18957 int vlen = Matcher::vector_length(this);
18958 int vlen_enc = vector_length_encoding(this);
18959 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18960 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18961 } else if (VM_Version::supports_avx2()) {
18962 __ movdq($dst$$XMMRegister, $src$$Register);
18963 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18964 } else {
18965 __ movdq($dst$$XMMRegister, $src$$Register);
18966 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18967 }
18968 %}
18969 ins_pipe( pipe_slow );
18970 %}
18971
18972 instruct ReplL_mem(vec dst, memory mem) %{
18973 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18974 match(Set dst (Replicate (LoadL mem)));
18975 format %{ "replicateL $dst,$mem" %}
18976 ins_encode %{
18977 int vlen_enc = vector_length_encoding(this);
18978 if (VM_Version::supports_avx2()) {
18979 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18980 } else if (VM_Version::supports_sse3()) {
18981 __ movddup($dst$$XMMRegister, $mem$$Address);
18982 } else {
18983 __ movq($dst$$XMMRegister, $mem$$Address);
18984 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18985 }
18986 %}
18987 ins_pipe( pipe_slow );
18988 %}
18989
18990 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18991 instruct ReplL_imm(vec dst, immL con) %{
18992 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18993 match(Set dst (Replicate con));
18994 format %{ "replicateL $dst,$con" %}
18995 ins_encode %{
18996 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18997 int vlen = Matcher::vector_length_in_bytes(this);
18998 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18999 %}
19000 ins_pipe( pipe_slow );
19001 %}
19002
19003 instruct ReplL_zero(vec dst, immL0 zero) %{
19004 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19005 match(Set dst (Replicate zero));
19006 format %{ "replicateL $dst,$zero" %}
19007 ins_encode %{
19008 int vlen_enc = vector_length_encoding(this);
19009 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19010 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19011 } else {
19012 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19013 }
19014 %}
19015 ins_pipe( fpu_reg_reg );
19016 %}
19017
19018 instruct ReplL_M1(vec dst, immL_M1 con) %{
19019 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19020 match(Set dst (Replicate con));
19021 format %{ "vallones $dst" %}
19022 ins_encode %{
19023 int vector_len = vector_length_encoding(this);
19024 __ vallones($dst$$XMMRegister, vector_len);
19025 %}
19026 ins_pipe( pipe_slow );
19027 %}
19028
19029 // ====================ReplicateF=======================================
19030
19031 instruct vReplF_reg(vec dst, vlRegF src) %{
19032 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19033 match(Set dst (Replicate src));
19034 format %{ "replicateF $dst,$src" %}
19035 ins_encode %{
19036 uint vlen = Matcher::vector_length(this);
19037 int vlen_enc = vector_length_encoding(this);
19038 if (vlen <= 4) {
19039 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19040 } else if (VM_Version::supports_avx2()) {
19041 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19042 } else {
19043 assert(vlen == 8, "sanity");
19044 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19045 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19046 }
19047 %}
19048 ins_pipe( pipe_slow );
19049 %}
19050
19051 instruct ReplF_reg(vec dst, vlRegF src) %{
19052 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19053 match(Set dst (Replicate src));
19054 format %{ "replicateF $dst,$src" %}
19055 ins_encode %{
19056 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19057 %}
19058 ins_pipe( pipe_slow );
19059 %}
19060
19061 instruct ReplF_mem(vec dst, memory mem) %{
19062 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19063 match(Set dst (Replicate (LoadF mem)));
19064 format %{ "replicateF $dst,$mem" %}
19065 ins_encode %{
19066 int vlen_enc = vector_length_encoding(this);
19067 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19068 %}
19069 ins_pipe( pipe_slow );
19070 %}
19071
19072 // Replicate float scalar immediate to be vector by loading from const table.
19073 instruct ReplF_imm(vec dst, immF con) %{
19074 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19075 match(Set dst (Replicate con));
19076 format %{ "replicateF $dst,$con" %}
19077 ins_encode %{
19078 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19079 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19080 int vlen = Matcher::vector_length_in_bytes(this);
19081 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19082 %}
19083 ins_pipe( pipe_slow );
19084 %}
19085
19086 instruct ReplF_zero(vec dst, immF0 zero) %{
19087 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19088 match(Set dst (Replicate zero));
19089 format %{ "replicateF $dst,$zero" %}
19090 ins_encode %{
19091 int vlen_enc = vector_length_encoding(this);
19092 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19093 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19094 } else {
19095 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19096 }
19097 %}
19098 ins_pipe( fpu_reg_reg );
19099 %}
19100
19101 // ====================ReplicateD=======================================
19102
19103 // Replicate double (8 bytes) scalar to be vector
19104 instruct vReplD_reg(vec dst, vlRegD src) %{
19105 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19106 match(Set dst (Replicate src));
19107 format %{ "replicateD $dst,$src" %}
19108 ins_encode %{
19109 uint vlen = Matcher::vector_length(this);
19110 int vlen_enc = vector_length_encoding(this);
19111 if (vlen <= 2) {
19112 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19113 } else if (VM_Version::supports_avx2()) {
19114 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19115 } else {
19116 assert(vlen == 4, "sanity");
19117 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19118 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19119 }
19120 %}
19121 ins_pipe( pipe_slow );
19122 %}
19123
19124 instruct ReplD_reg(vec dst, vlRegD src) %{
19125 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19126 match(Set dst (Replicate src));
19127 format %{ "replicateD $dst,$src" %}
19128 ins_encode %{
19129 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19130 %}
19131 ins_pipe( pipe_slow );
19132 %}
19133
19134 instruct ReplD_mem(vec dst, memory mem) %{
19135 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19136 match(Set dst (Replicate (LoadD mem)));
19137 format %{ "replicateD $dst,$mem" %}
19138 ins_encode %{
19139 if (Matcher::vector_length(this) >= 4) {
19140 int vlen_enc = vector_length_encoding(this);
19141 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19142 } else {
19143 __ movddup($dst$$XMMRegister, $mem$$Address);
19144 }
19145 %}
19146 ins_pipe( pipe_slow );
19147 %}
19148
19149 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19150 instruct ReplD_imm(vec dst, immD con) %{
19151 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19152 match(Set dst (Replicate con));
19153 format %{ "replicateD $dst,$con" %}
19154 ins_encode %{
19155 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19156 int vlen = Matcher::vector_length_in_bytes(this);
19157 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19158 %}
19159 ins_pipe( pipe_slow );
19160 %}
19161
19162 instruct ReplD_zero(vec dst, immD0 zero) %{
19163 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19164 match(Set dst (Replicate zero));
19165 format %{ "replicateD $dst,$zero" %}
19166 ins_encode %{
19167 int vlen_enc = vector_length_encoding(this);
19168 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19169 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19170 } else {
19171 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19172 }
19173 %}
19174 ins_pipe( fpu_reg_reg );
19175 %}
19176
19177 // ====================VECTOR INSERT=======================================
19178
19179 instruct insert(vec dst, rRegI val, immU8 idx) %{
19180 predicate(Matcher::vector_length_in_bytes(n) < 32);
19181 match(Set dst (VectorInsert (Binary dst val) idx));
19182 format %{ "vector_insert $dst,$val,$idx" %}
19183 ins_encode %{
19184 assert(UseSSE >= 4, "required");
19185 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19186
19187 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19188
19189 assert(is_integral_type(elem_bt), "");
19190 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19191
19192 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19193 %}
19194 ins_pipe( pipe_slow );
19195 %}
19196
19197 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19198 predicate(Matcher::vector_length_in_bytes(n) == 32);
19199 match(Set dst (VectorInsert (Binary src val) idx));
19200 effect(TEMP vtmp);
19201 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19202 ins_encode %{
19203 int vlen_enc = Assembler::AVX_256bit;
19204 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19205 int elem_per_lane = 16/type2aelembytes(elem_bt);
19206 int log2epr = log2(elem_per_lane);
19207
19208 assert(is_integral_type(elem_bt), "sanity");
19209 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19210
19211 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19212 uint y_idx = ($idx$$constant >> log2epr) & 1;
19213 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19214 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19215 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19216 %}
19217 ins_pipe( pipe_slow );
19218 %}
19219
19220 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19221 predicate(Matcher::vector_length_in_bytes(n) == 64);
19222 match(Set dst (VectorInsert (Binary src val) idx));
19223 effect(TEMP vtmp);
19224 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19225 ins_encode %{
19226 assert(UseAVX > 2, "sanity");
19227
19228 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19229 int elem_per_lane = 16/type2aelembytes(elem_bt);
19230 int log2epr = log2(elem_per_lane);
19231
19232 assert(is_integral_type(elem_bt), "");
19233 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19234
19235 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19236 uint y_idx = ($idx$$constant >> log2epr) & 3;
19237 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19238 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19239 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19240 %}
19241 ins_pipe( pipe_slow );
19242 %}
19243
19244 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19245 predicate(Matcher::vector_length(n) == 2);
19246 match(Set dst (VectorInsert (Binary dst val) idx));
19247 format %{ "vector_insert $dst,$val,$idx" %}
19248 ins_encode %{
19249 assert(UseSSE >= 4, "required");
19250 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19251 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19252
19253 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19254 %}
19255 ins_pipe( pipe_slow );
19256 %}
19257
19258 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19259 predicate(Matcher::vector_length(n) == 4);
19260 match(Set dst (VectorInsert (Binary src val) idx));
19261 effect(TEMP vtmp);
19262 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19263 ins_encode %{
19264 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19265 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19266
19267 uint x_idx = $idx$$constant & right_n_bits(1);
19268 uint y_idx = ($idx$$constant >> 1) & 1;
19269 int vlen_enc = Assembler::AVX_256bit;
19270 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19271 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19272 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19273 %}
19274 ins_pipe( pipe_slow );
19275 %}
19276
19277 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19278 predicate(Matcher::vector_length(n) == 8);
19279 match(Set dst (VectorInsert (Binary src val) idx));
19280 effect(TEMP vtmp);
19281 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19282 ins_encode %{
19283 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19284 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19285
19286 uint x_idx = $idx$$constant & right_n_bits(1);
19287 uint y_idx = ($idx$$constant >> 1) & 3;
19288 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19289 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19290 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19291 %}
19292 ins_pipe( pipe_slow );
19293 %}
19294
19295 instruct insertF(vec dst, regF val, immU8 idx) %{
19296 predicate(Matcher::vector_length(n) < 8);
19297 match(Set dst (VectorInsert (Binary dst val) idx));
19298 format %{ "vector_insert $dst,$val,$idx" %}
19299 ins_encode %{
19300 assert(UseSSE >= 4, "sanity");
19301
19302 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19303 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19304
19305 uint x_idx = $idx$$constant & right_n_bits(2);
19306 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19307 %}
19308 ins_pipe( pipe_slow );
19309 %}
19310
19311 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19312 predicate(Matcher::vector_length(n) >= 8);
19313 match(Set dst (VectorInsert (Binary src val) idx));
19314 effect(TEMP vtmp);
19315 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19316 ins_encode %{
19317 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19318 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19319
19320 int vlen = Matcher::vector_length(this);
19321 uint x_idx = $idx$$constant & right_n_bits(2);
19322 if (vlen == 8) {
19323 uint y_idx = ($idx$$constant >> 2) & 1;
19324 int vlen_enc = Assembler::AVX_256bit;
19325 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19326 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19327 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19328 } else {
19329 assert(vlen == 16, "sanity");
19330 uint y_idx = ($idx$$constant >> 2) & 3;
19331 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19332 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19333 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19334 }
19335 %}
19336 ins_pipe( pipe_slow );
19337 %}
19338
19339 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19340 predicate(Matcher::vector_length(n) == 2);
19341 match(Set dst (VectorInsert (Binary dst val) idx));
19342 effect(TEMP tmp);
19343 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19344 ins_encode %{
19345 assert(UseSSE >= 4, "sanity");
19346 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19347 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19348
19349 __ movq($tmp$$Register, $val$$XMMRegister);
19350 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19351 %}
19352 ins_pipe( pipe_slow );
19353 %}
19354
19355 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19356 predicate(Matcher::vector_length(n) == 4);
19357 match(Set dst (VectorInsert (Binary src val) idx));
19358 effect(TEMP vtmp, TEMP tmp);
19359 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19360 ins_encode %{
19361 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19362 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19363
19364 uint x_idx = $idx$$constant & right_n_bits(1);
19365 uint y_idx = ($idx$$constant >> 1) & 1;
19366 int vlen_enc = Assembler::AVX_256bit;
19367 __ movq($tmp$$Register, $val$$XMMRegister);
19368 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19369 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19370 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19371 %}
19372 ins_pipe( pipe_slow );
19373 %}
19374
19375 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19376 predicate(Matcher::vector_length(n) == 8);
19377 match(Set dst (VectorInsert (Binary src val) idx));
19378 effect(TEMP tmp, TEMP vtmp);
19379 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19380 ins_encode %{
19381 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19382 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19383
19384 uint x_idx = $idx$$constant & right_n_bits(1);
19385 uint y_idx = ($idx$$constant >> 1) & 3;
19386 __ movq($tmp$$Register, $val$$XMMRegister);
19387 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19388 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19389 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19390 %}
19391 ins_pipe( pipe_slow );
19392 %}
19393
19394 // ====================REDUCTION ARITHMETIC=======================================
19395
19396 // =======================Int Reduction==========================================
19397
19398 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19399 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19400 match(Set dst (AddReductionVI src1 src2));
19401 match(Set dst (MulReductionVI src1 src2));
19402 match(Set dst (AndReductionV src1 src2));
19403 match(Set dst ( OrReductionV src1 src2));
19404 match(Set dst (XorReductionV src1 src2));
19405 match(Set dst (MinReductionV src1 src2));
19406 match(Set dst (MaxReductionV src1 src2));
19407 match(Set dst (UMinReductionV src1 src2));
19408 match(Set dst (UMaxReductionV src1 src2));
19409 effect(TEMP vtmp1, TEMP vtmp2);
19410 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19411 ins_encode %{
19412 int opcode = this->ideal_Opcode();
19413 int vlen = Matcher::vector_length(this, $src2);
19414 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19415 %}
19416 ins_pipe( pipe_slow );
19417 %}
19418
19419 // =======================Long Reduction==========================================
19420
19421 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19422 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19423 match(Set dst (AddReductionVL src1 src2));
19424 match(Set dst (MulReductionVL src1 src2));
19425 match(Set dst (AndReductionV src1 src2));
19426 match(Set dst ( OrReductionV src1 src2));
19427 match(Set dst (XorReductionV src1 src2));
19428 match(Set dst (MinReductionV src1 src2));
19429 match(Set dst (MaxReductionV src1 src2));
19430 match(Set dst (UMinReductionV src1 src2));
19431 match(Set dst (UMaxReductionV src1 src2));
19432 effect(TEMP vtmp1, TEMP vtmp2);
19433 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19434 ins_encode %{
19435 int opcode = this->ideal_Opcode();
19436 int vlen = Matcher::vector_length(this, $src2);
19437 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19438 %}
19439 ins_pipe( pipe_slow );
19440 %}
19441
19442 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19443 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19444 match(Set dst (AddReductionVL src1 src2));
19445 match(Set dst (MulReductionVL src1 src2));
19446 match(Set dst (AndReductionV src1 src2));
19447 match(Set dst ( OrReductionV src1 src2));
19448 match(Set dst (XorReductionV src1 src2));
19449 match(Set dst (MinReductionV src1 src2));
19450 match(Set dst (MaxReductionV src1 src2));
19451 match(Set dst (UMinReductionV src1 src2));
19452 match(Set dst (UMaxReductionV src1 src2));
19453 effect(TEMP vtmp1, TEMP vtmp2);
19454 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19455 ins_encode %{
19456 int opcode = this->ideal_Opcode();
19457 int vlen = Matcher::vector_length(this, $src2);
19458 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19459 %}
19460 ins_pipe( pipe_slow );
19461 %}
19462
19463 // =======================Float Reduction==========================================
19464
19465 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19466 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19467 match(Set dst (AddReductionVF dst src));
19468 match(Set dst (MulReductionVF dst src));
19469 effect(TEMP dst, TEMP vtmp);
19470 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19471 ins_encode %{
19472 int opcode = this->ideal_Opcode();
19473 int vlen = Matcher::vector_length(this, $src);
19474 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19475 %}
19476 ins_pipe( pipe_slow );
19477 %}
19478
19479 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19480 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19481 match(Set dst (AddReductionVF dst src));
19482 match(Set dst (MulReductionVF dst src));
19483 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19484 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19485 ins_encode %{
19486 int opcode = this->ideal_Opcode();
19487 int vlen = Matcher::vector_length(this, $src);
19488 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19489 %}
19490 ins_pipe( pipe_slow );
19491 %}
19492
19493 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19494 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19495 match(Set dst (AddReductionVF dst src));
19496 match(Set dst (MulReductionVF dst src));
19497 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19498 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19499 ins_encode %{
19500 int opcode = this->ideal_Opcode();
19501 int vlen = Matcher::vector_length(this, $src);
19502 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19503 %}
19504 ins_pipe( pipe_slow );
19505 %}
19506
19507
19508 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19509 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19510 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19511 // src1 contains reduction identity
19512 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19513 match(Set dst (AddReductionVF src1 src2));
19514 match(Set dst (MulReductionVF src1 src2));
19515 effect(TEMP dst);
19516 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19517 ins_encode %{
19518 int opcode = this->ideal_Opcode();
19519 int vlen = Matcher::vector_length(this, $src2);
19520 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19521 %}
19522 ins_pipe( pipe_slow );
19523 %}
19524
19525 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19526 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19527 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19528 // src1 contains reduction identity
19529 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19530 match(Set dst (AddReductionVF src1 src2));
19531 match(Set dst (MulReductionVF src1 src2));
19532 effect(TEMP dst, TEMP vtmp);
19533 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19534 ins_encode %{
19535 int opcode = this->ideal_Opcode();
19536 int vlen = Matcher::vector_length(this, $src2);
19537 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19538 %}
19539 ins_pipe( pipe_slow );
19540 %}
19541
19542 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19543 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19544 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19545 // src1 contains reduction identity
19546 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19547 match(Set dst (AddReductionVF src1 src2));
19548 match(Set dst (MulReductionVF src1 src2));
19549 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19550 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19551 ins_encode %{
19552 int opcode = this->ideal_Opcode();
19553 int vlen = Matcher::vector_length(this, $src2);
19554 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19555 %}
19556 ins_pipe( pipe_slow );
19557 %}
19558
19559 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19560 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19561 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19562 // src1 contains reduction identity
19563 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19564 match(Set dst (AddReductionVF src1 src2));
19565 match(Set dst (MulReductionVF src1 src2));
19566 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19567 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19568 ins_encode %{
19569 int opcode = this->ideal_Opcode();
19570 int vlen = Matcher::vector_length(this, $src2);
19571 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19572 %}
19573 ins_pipe( pipe_slow );
19574 %}
19575
19576 // =======================Double Reduction==========================================
19577
19578 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19579 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19580 match(Set dst (AddReductionVD dst src));
19581 match(Set dst (MulReductionVD dst src));
19582 effect(TEMP dst, TEMP vtmp);
19583 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19584 ins_encode %{
19585 int opcode = this->ideal_Opcode();
19586 int vlen = Matcher::vector_length(this, $src);
19587 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19588 %}
19589 ins_pipe( pipe_slow );
19590 %}
19591
19592 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19593 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19594 match(Set dst (AddReductionVD dst src));
19595 match(Set dst (MulReductionVD dst src));
19596 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19597 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19598 ins_encode %{
19599 int opcode = this->ideal_Opcode();
19600 int vlen = Matcher::vector_length(this, $src);
19601 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19602 %}
19603 ins_pipe( pipe_slow );
19604 %}
19605
19606 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19607 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19608 match(Set dst (AddReductionVD dst src));
19609 match(Set dst (MulReductionVD dst src));
19610 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19611 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19612 ins_encode %{
19613 int opcode = this->ideal_Opcode();
19614 int vlen = Matcher::vector_length(this, $src);
19615 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19616 %}
19617 ins_pipe( pipe_slow );
19618 %}
19619
19620 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19621 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19622 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19623 // src1 contains reduction identity
19624 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19625 match(Set dst (AddReductionVD src1 src2));
19626 match(Set dst (MulReductionVD src1 src2));
19627 effect(TEMP dst);
19628 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19629 ins_encode %{
19630 int opcode = this->ideal_Opcode();
19631 int vlen = Matcher::vector_length(this, $src2);
19632 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19633 %}
19634 ins_pipe( pipe_slow );
19635 %}
19636
19637 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19638 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19639 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19640 // src1 contains reduction identity
19641 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19642 match(Set dst (AddReductionVD src1 src2));
19643 match(Set dst (MulReductionVD src1 src2));
19644 effect(TEMP dst, TEMP vtmp);
19645 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19646 ins_encode %{
19647 int opcode = this->ideal_Opcode();
19648 int vlen = Matcher::vector_length(this, $src2);
19649 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19650 %}
19651 ins_pipe( pipe_slow );
19652 %}
19653
19654 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19655 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19656 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19657 // src1 contains reduction identity
19658 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19659 match(Set dst (AddReductionVD src1 src2));
19660 match(Set dst (MulReductionVD src1 src2));
19661 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19662 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19663 ins_encode %{
19664 int opcode = this->ideal_Opcode();
19665 int vlen = Matcher::vector_length(this, $src2);
19666 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19667 %}
19668 ins_pipe( pipe_slow );
19669 %}
19670
19671 // =======================Byte Reduction==========================================
19672
19673 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19674 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19675 match(Set dst (AddReductionVI src1 src2));
19676 match(Set dst (AndReductionV src1 src2));
19677 match(Set dst ( OrReductionV src1 src2));
19678 match(Set dst (XorReductionV src1 src2));
19679 match(Set dst (MinReductionV src1 src2));
19680 match(Set dst (MaxReductionV src1 src2));
19681 match(Set dst (UMinReductionV src1 src2));
19682 match(Set dst (UMaxReductionV src1 src2));
19683 effect(TEMP vtmp1, TEMP vtmp2);
19684 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19685 ins_encode %{
19686 int opcode = this->ideal_Opcode();
19687 int vlen = Matcher::vector_length(this, $src2);
19688 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19689 %}
19690 ins_pipe( pipe_slow );
19691 %}
19692
19693 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19694 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19695 match(Set dst (AddReductionVI src1 src2));
19696 match(Set dst (AndReductionV src1 src2));
19697 match(Set dst ( OrReductionV src1 src2));
19698 match(Set dst (XorReductionV src1 src2));
19699 match(Set dst (MinReductionV src1 src2));
19700 match(Set dst (MaxReductionV src1 src2));
19701 match(Set dst (UMinReductionV src1 src2));
19702 match(Set dst (UMaxReductionV src1 src2));
19703 effect(TEMP vtmp1, TEMP vtmp2);
19704 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19705 ins_encode %{
19706 int opcode = this->ideal_Opcode();
19707 int vlen = Matcher::vector_length(this, $src2);
19708 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19709 %}
19710 ins_pipe( pipe_slow );
19711 %}
19712
19713 // =======================Short Reduction==========================================
19714
19715 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19716 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19717 match(Set dst (AddReductionVI src1 src2));
19718 match(Set dst (MulReductionVI src1 src2));
19719 match(Set dst (AndReductionV src1 src2));
19720 match(Set dst ( OrReductionV src1 src2));
19721 match(Set dst (XorReductionV src1 src2));
19722 match(Set dst (MinReductionV src1 src2));
19723 match(Set dst (MaxReductionV src1 src2));
19724 match(Set dst (UMinReductionV src1 src2));
19725 match(Set dst (UMaxReductionV src1 src2));
19726 effect(TEMP vtmp1, TEMP vtmp2);
19727 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19728 ins_encode %{
19729 int opcode = this->ideal_Opcode();
19730 int vlen = Matcher::vector_length(this, $src2);
19731 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19732 %}
19733 ins_pipe( pipe_slow );
19734 %}
19735
19736 // =======================Mul Reduction==========================================
19737
19738 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19739 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19740 Matcher::vector_length(n->in(2)) <= 32); // src2
19741 match(Set dst (MulReductionVI src1 src2));
19742 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19743 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19744 ins_encode %{
19745 int opcode = this->ideal_Opcode();
19746 int vlen = Matcher::vector_length(this, $src2);
19747 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19748 %}
19749 ins_pipe( pipe_slow );
19750 %}
19751
19752 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19753 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19754 Matcher::vector_length(n->in(2)) == 64); // src2
19755 match(Set dst (MulReductionVI src1 src2));
19756 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19757 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19758 ins_encode %{
19759 int opcode = this->ideal_Opcode();
19760 int vlen = Matcher::vector_length(this, $src2);
19761 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19762 %}
19763 ins_pipe( pipe_slow );
19764 %}
19765
19766 //--------------------Min/Max Float Reduction --------------------
19767 // Float Min Reduction
19768 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19769 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19770 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19771 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19772 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19773 Matcher::vector_length(n->in(2)) == 2);
19774 match(Set dst (MinReductionV src1 src2));
19775 match(Set dst (MaxReductionV src1 src2));
19776 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19777 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19778 ins_encode %{
19779 assert(UseAVX > 0, "sanity");
19780
19781 int opcode = this->ideal_Opcode();
19782 int vlen = Matcher::vector_length(this, $src2);
19783 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19784 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19785 %}
19786 ins_pipe( pipe_slow );
19787 %}
19788
19789 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19790 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19791 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19792 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19793 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19794 Matcher::vector_length(n->in(2)) >= 4);
19795 match(Set dst (MinReductionV src1 src2));
19796 match(Set dst (MaxReductionV src1 src2));
19797 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19798 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19799 ins_encode %{
19800 assert(UseAVX > 0, "sanity");
19801
19802 int opcode = this->ideal_Opcode();
19803 int vlen = Matcher::vector_length(this, $src2);
19804 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19805 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19806 %}
19807 ins_pipe( pipe_slow );
19808 %}
19809
19810 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19811 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19812 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19813 Matcher::vector_length(n->in(2)) == 2);
19814 match(Set dst (MinReductionV dst src));
19815 match(Set dst (MaxReductionV dst src));
19816 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19817 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19818 ins_encode %{
19819 assert(UseAVX > 0, "sanity");
19820
19821 int opcode = this->ideal_Opcode();
19822 int vlen = Matcher::vector_length(this, $src);
19823 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19824 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19825 %}
19826 ins_pipe( pipe_slow );
19827 %}
19828
19829
19830 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19831 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19832 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19833 Matcher::vector_length(n->in(2)) >= 4);
19834 match(Set dst (MinReductionV dst src));
19835 match(Set dst (MaxReductionV dst src));
19836 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19837 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19838 ins_encode %{
19839 assert(UseAVX > 0, "sanity");
19840
19841 int opcode = this->ideal_Opcode();
19842 int vlen = Matcher::vector_length(this, $src);
19843 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19844 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19845 %}
19846 ins_pipe( pipe_slow );
19847 %}
19848
19849 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19850 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19851 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19852 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19853 Matcher::vector_length(n->in(2)) == 2);
19854 match(Set dst (MinReductionV src1 src2));
19855 match(Set dst (MaxReductionV src1 src2));
19856 effect(TEMP dst, TEMP xtmp1);
19857 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19858 ins_encode %{
19859 int opcode = this->ideal_Opcode();
19860 int vlen = Matcher::vector_length(this, $src2);
19861 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19862 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19863 %}
19864 ins_pipe( pipe_slow );
19865 %}
19866
19867 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19868 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19869 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19870 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19871 Matcher::vector_length(n->in(2)) >= 4);
19872 match(Set dst (MinReductionV src1 src2));
19873 match(Set dst (MaxReductionV src1 src2));
19874 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19875 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19876 ins_encode %{
19877 int opcode = this->ideal_Opcode();
19878 int vlen = Matcher::vector_length(this, $src2);
19879 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19880 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19881 %}
19882 ins_pipe( pipe_slow );
19883 %}
19884
19885 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19886 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19887 Matcher::vector_length(n->in(2)) == 2);
19888 match(Set dst (MinReductionV dst src));
19889 match(Set dst (MaxReductionV dst src));
19890 effect(TEMP dst, TEMP xtmp1);
19891 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19892 ins_encode %{
19893 int opcode = this->ideal_Opcode();
19894 int vlen = Matcher::vector_length(this, $src);
19895 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19896 $xtmp1$$XMMRegister);
19897 %}
19898 ins_pipe( pipe_slow );
19899 %}
19900
19901 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19902 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19903 Matcher::vector_length(n->in(2)) >= 4);
19904 match(Set dst (MinReductionV dst src));
19905 match(Set dst (MaxReductionV dst src));
19906 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19907 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19908 ins_encode %{
19909 int opcode = this->ideal_Opcode();
19910 int vlen = Matcher::vector_length(this, $src);
19911 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19912 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19913 %}
19914 ins_pipe( pipe_slow );
19915 %}
19916
19917 //--------------------Min Double Reduction --------------------
19918 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19919 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19920 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19921 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19922 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19923 Matcher::vector_length(n->in(2)) == 2);
19924 match(Set dst (MinReductionV src1 src2));
19925 match(Set dst (MaxReductionV src1 src2));
19926 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19927 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19928 ins_encode %{
19929 assert(UseAVX > 0, "sanity");
19930
19931 int opcode = this->ideal_Opcode();
19932 int vlen = Matcher::vector_length(this, $src2);
19933 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19934 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19935 %}
19936 ins_pipe( pipe_slow );
19937 %}
19938
19939 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19940 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19941 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19942 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19943 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19944 Matcher::vector_length(n->in(2)) >= 4);
19945 match(Set dst (MinReductionV src1 src2));
19946 match(Set dst (MaxReductionV src1 src2));
19947 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19948 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19949 ins_encode %{
19950 assert(UseAVX > 0, "sanity");
19951
19952 int opcode = this->ideal_Opcode();
19953 int vlen = Matcher::vector_length(this, $src2);
19954 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19955 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19956 %}
19957 ins_pipe( pipe_slow );
19958 %}
19959
19960
19961 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19962 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19963 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19964 Matcher::vector_length(n->in(2)) == 2);
19965 match(Set dst (MinReductionV dst src));
19966 match(Set dst (MaxReductionV dst src));
19967 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19968 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19969 ins_encode %{
19970 assert(UseAVX > 0, "sanity");
19971
19972 int opcode = this->ideal_Opcode();
19973 int vlen = Matcher::vector_length(this, $src);
19974 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19975 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19976 %}
19977 ins_pipe( pipe_slow );
19978 %}
19979
19980 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19981 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19982 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19983 Matcher::vector_length(n->in(2)) >= 4);
19984 match(Set dst (MinReductionV dst src));
19985 match(Set dst (MaxReductionV dst src));
19986 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19987 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19988 ins_encode %{
19989 assert(UseAVX > 0, "sanity");
19990
19991 int opcode = this->ideal_Opcode();
19992 int vlen = Matcher::vector_length(this, $src);
19993 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19994 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19995 %}
19996 ins_pipe( pipe_slow );
19997 %}
19998
19999 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20000 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20001 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20002 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20003 Matcher::vector_length(n->in(2)) == 2);
20004 match(Set dst (MinReductionV src1 src2));
20005 match(Set dst (MaxReductionV src1 src2));
20006 effect(TEMP dst, TEMP xtmp1);
20007 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20008 ins_encode %{
20009 int opcode = this->ideal_Opcode();
20010 int vlen = Matcher::vector_length(this, $src2);
20011 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20012 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20013 %}
20014 ins_pipe( pipe_slow );
20015 %}
20016
20017 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20018 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20019 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20020 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20021 Matcher::vector_length(n->in(2)) >= 4);
20022 match(Set dst (MinReductionV src1 src2));
20023 match(Set dst (MaxReductionV src1 src2));
20024 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20025 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20026 ins_encode %{
20027 int opcode = this->ideal_Opcode();
20028 int vlen = Matcher::vector_length(this, $src2);
20029 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20030 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20031 %}
20032 ins_pipe( pipe_slow );
20033 %}
20034
20035
20036 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20037 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20038 Matcher::vector_length(n->in(2)) == 2);
20039 match(Set dst (MinReductionV dst src));
20040 match(Set dst (MaxReductionV dst src));
20041 effect(TEMP dst, TEMP xtmp1);
20042 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20043 ins_encode %{
20044 int opcode = this->ideal_Opcode();
20045 int vlen = Matcher::vector_length(this, $src);
20046 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20047 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20048 %}
20049 ins_pipe( pipe_slow );
20050 %}
20051
20052 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20053 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20054 Matcher::vector_length(n->in(2)) >= 4);
20055 match(Set dst (MinReductionV dst src));
20056 match(Set dst (MaxReductionV dst src));
20057 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20058 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20059 ins_encode %{
20060 int opcode = this->ideal_Opcode();
20061 int vlen = Matcher::vector_length(this, $src);
20062 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20063 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20064 %}
20065 ins_pipe( pipe_slow );
20066 %}
20067
20068 // ====================VECTOR ARITHMETIC=======================================
20069
20070 // --------------------------------- ADD --------------------------------------
20071
20072 // Bytes vector add
20073 instruct vaddB(vec dst, vec src) %{
20074 predicate(UseAVX == 0);
20075 match(Set dst (AddVB dst src));
20076 format %{ "paddb $dst,$src\t! add packedB" %}
20077 ins_encode %{
20078 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20079 %}
20080 ins_pipe( pipe_slow );
20081 %}
20082
20083 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20084 predicate(UseAVX > 0);
20085 match(Set dst (AddVB src1 src2));
20086 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20087 ins_encode %{
20088 int vlen_enc = vector_length_encoding(this);
20089 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20090 %}
20091 ins_pipe( pipe_slow );
20092 %}
20093
20094 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20095 predicate((UseAVX > 0) &&
20096 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20097 match(Set dst (AddVB src (LoadVector mem)));
20098 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20099 ins_encode %{
20100 int vlen_enc = vector_length_encoding(this);
20101 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20102 %}
20103 ins_pipe( pipe_slow );
20104 %}
20105
20106 // Shorts/Chars vector add
20107 instruct vaddS(vec dst, vec src) %{
20108 predicate(UseAVX == 0);
20109 match(Set dst (AddVS dst src));
20110 format %{ "paddw $dst,$src\t! add packedS" %}
20111 ins_encode %{
20112 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20113 %}
20114 ins_pipe( pipe_slow );
20115 %}
20116
20117 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20118 predicate(UseAVX > 0);
20119 match(Set dst (AddVS src1 src2));
20120 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20121 ins_encode %{
20122 int vlen_enc = vector_length_encoding(this);
20123 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20124 %}
20125 ins_pipe( pipe_slow );
20126 %}
20127
20128 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20129 predicate((UseAVX > 0) &&
20130 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20131 match(Set dst (AddVS src (LoadVector mem)));
20132 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20133 ins_encode %{
20134 int vlen_enc = vector_length_encoding(this);
20135 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20136 %}
20137 ins_pipe( pipe_slow );
20138 %}
20139
20140 // Integers vector add
20141 instruct vaddI(vec dst, vec src) %{
20142 predicate(UseAVX == 0);
20143 match(Set dst (AddVI dst src));
20144 format %{ "paddd $dst,$src\t! add packedI" %}
20145 ins_encode %{
20146 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20147 %}
20148 ins_pipe( pipe_slow );
20149 %}
20150
20151 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20152 predicate(UseAVX > 0);
20153 match(Set dst (AddVI src1 src2));
20154 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20155 ins_encode %{
20156 int vlen_enc = vector_length_encoding(this);
20157 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20158 %}
20159 ins_pipe( pipe_slow );
20160 %}
20161
20162
20163 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20164 predicate((UseAVX > 0) &&
20165 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20166 match(Set dst (AddVI src (LoadVector mem)));
20167 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20168 ins_encode %{
20169 int vlen_enc = vector_length_encoding(this);
20170 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20171 %}
20172 ins_pipe( pipe_slow );
20173 %}
20174
20175 // Longs vector add
20176 instruct vaddL(vec dst, vec src) %{
20177 predicate(UseAVX == 0);
20178 match(Set dst (AddVL dst src));
20179 format %{ "paddq $dst,$src\t! add packedL" %}
20180 ins_encode %{
20181 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20182 %}
20183 ins_pipe( pipe_slow );
20184 %}
20185
20186 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20187 predicate(UseAVX > 0);
20188 match(Set dst (AddVL src1 src2));
20189 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20190 ins_encode %{
20191 int vlen_enc = vector_length_encoding(this);
20192 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20193 %}
20194 ins_pipe( pipe_slow );
20195 %}
20196
20197 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20198 predicate((UseAVX > 0) &&
20199 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20200 match(Set dst (AddVL src (LoadVector mem)));
20201 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20202 ins_encode %{
20203 int vlen_enc = vector_length_encoding(this);
20204 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20205 %}
20206 ins_pipe( pipe_slow );
20207 %}
20208
20209 // Floats vector add
20210 instruct vaddF(vec dst, vec src) %{
20211 predicate(UseAVX == 0);
20212 match(Set dst (AddVF dst src));
20213 format %{ "addps $dst,$src\t! add packedF" %}
20214 ins_encode %{
20215 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20216 %}
20217 ins_pipe( pipe_slow );
20218 %}
20219
20220 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20221 predicate(UseAVX > 0);
20222 match(Set dst (AddVF src1 src2));
20223 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20224 ins_encode %{
20225 int vlen_enc = vector_length_encoding(this);
20226 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20227 %}
20228 ins_pipe( pipe_slow );
20229 %}
20230
20231 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20232 predicate((UseAVX > 0) &&
20233 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20234 match(Set dst (AddVF src (LoadVector mem)));
20235 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20236 ins_encode %{
20237 int vlen_enc = vector_length_encoding(this);
20238 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20239 %}
20240 ins_pipe( pipe_slow );
20241 %}
20242
20243 // Doubles vector add
20244 instruct vaddD(vec dst, vec src) %{
20245 predicate(UseAVX == 0);
20246 match(Set dst (AddVD dst src));
20247 format %{ "addpd $dst,$src\t! add packedD" %}
20248 ins_encode %{
20249 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20250 %}
20251 ins_pipe( pipe_slow );
20252 %}
20253
20254 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20255 predicate(UseAVX > 0);
20256 match(Set dst (AddVD src1 src2));
20257 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20258 ins_encode %{
20259 int vlen_enc = vector_length_encoding(this);
20260 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20261 %}
20262 ins_pipe( pipe_slow );
20263 %}
20264
20265 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20266 predicate((UseAVX > 0) &&
20267 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20268 match(Set dst (AddVD src (LoadVector mem)));
20269 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20270 ins_encode %{
20271 int vlen_enc = vector_length_encoding(this);
20272 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20273 %}
20274 ins_pipe( pipe_slow );
20275 %}
20276
20277 // --------------------------------- SUB --------------------------------------
20278
20279 // Bytes vector sub
20280 instruct vsubB(vec dst, vec src) %{
20281 predicate(UseAVX == 0);
20282 match(Set dst (SubVB dst src));
20283 format %{ "psubb $dst,$src\t! sub packedB" %}
20284 ins_encode %{
20285 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20286 %}
20287 ins_pipe( pipe_slow );
20288 %}
20289
20290 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20291 predicate(UseAVX > 0);
20292 match(Set dst (SubVB src1 src2));
20293 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20294 ins_encode %{
20295 int vlen_enc = vector_length_encoding(this);
20296 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20297 %}
20298 ins_pipe( pipe_slow );
20299 %}
20300
20301 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20302 predicate((UseAVX > 0) &&
20303 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20304 match(Set dst (SubVB src (LoadVector mem)));
20305 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20306 ins_encode %{
20307 int vlen_enc = vector_length_encoding(this);
20308 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20309 %}
20310 ins_pipe( pipe_slow );
20311 %}
20312
20313 // Shorts/Chars vector sub
20314 instruct vsubS(vec dst, vec src) %{
20315 predicate(UseAVX == 0);
20316 match(Set dst (SubVS dst src));
20317 format %{ "psubw $dst,$src\t! sub packedS" %}
20318 ins_encode %{
20319 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20320 %}
20321 ins_pipe( pipe_slow );
20322 %}
20323
20324
20325 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20326 predicate(UseAVX > 0);
20327 match(Set dst (SubVS src1 src2));
20328 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20329 ins_encode %{
20330 int vlen_enc = vector_length_encoding(this);
20331 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20332 %}
20333 ins_pipe( pipe_slow );
20334 %}
20335
20336 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20337 predicate((UseAVX > 0) &&
20338 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20339 match(Set dst (SubVS src (LoadVector mem)));
20340 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20341 ins_encode %{
20342 int vlen_enc = vector_length_encoding(this);
20343 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20344 %}
20345 ins_pipe( pipe_slow );
20346 %}
20347
20348 // Integers vector sub
20349 instruct vsubI(vec dst, vec src) %{
20350 predicate(UseAVX == 0);
20351 match(Set dst (SubVI dst src));
20352 format %{ "psubd $dst,$src\t! sub packedI" %}
20353 ins_encode %{
20354 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20355 %}
20356 ins_pipe( pipe_slow );
20357 %}
20358
20359 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20360 predicate(UseAVX > 0);
20361 match(Set dst (SubVI src1 src2));
20362 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20363 ins_encode %{
20364 int vlen_enc = vector_length_encoding(this);
20365 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20366 %}
20367 ins_pipe( pipe_slow );
20368 %}
20369
20370 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20371 predicate((UseAVX > 0) &&
20372 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20373 match(Set dst (SubVI src (LoadVector mem)));
20374 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20375 ins_encode %{
20376 int vlen_enc = vector_length_encoding(this);
20377 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20378 %}
20379 ins_pipe( pipe_slow );
20380 %}
20381
20382 // Longs vector sub
20383 instruct vsubL(vec dst, vec src) %{
20384 predicate(UseAVX == 0);
20385 match(Set dst (SubVL dst src));
20386 format %{ "psubq $dst,$src\t! sub packedL" %}
20387 ins_encode %{
20388 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20389 %}
20390 ins_pipe( pipe_slow );
20391 %}
20392
20393 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20394 predicate(UseAVX > 0);
20395 match(Set dst (SubVL src1 src2));
20396 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20397 ins_encode %{
20398 int vlen_enc = vector_length_encoding(this);
20399 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20400 %}
20401 ins_pipe( pipe_slow );
20402 %}
20403
20404
20405 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20406 predicate((UseAVX > 0) &&
20407 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20408 match(Set dst (SubVL src (LoadVector mem)));
20409 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20410 ins_encode %{
20411 int vlen_enc = vector_length_encoding(this);
20412 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20413 %}
20414 ins_pipe( pipe_slow );
20415 %}
20416
20417 // Floats vector sub
20418 instruct vsubF(vec dst, vec src) %{
20419 predicate(UseAVX == 0);
20420 match(Set dst (SubVF dst src));
20421 format %{ "subps $dst,$src\t! sub packedF" %}
20422 ins_encode %{
20423 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20424 %}
20425 ins_pipe( pipe_slow );
20426 %}
20427
20428 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20429 predicate(UseAVX > 0);
20430 match(Set dst (SubVF src1 src2));
20431 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20432 ins_encode %{
20433 int vlen_enc = vector_length_encoding(this);
20434 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20435 %}
20436 ins_pipe( pipe_slow );
20437 %}
20438
20439 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20440 predicate((UseAVX > 0) &&
20441 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20442 match(Set dst (SubVF src (LoadVector mem)));
20443 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20444 ins_encode %{
20445 int vlen_enc = vector_length_encoding(this);
20446 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20447 %}
20448 ins_pipe( pipe_slow );
20449 %}
20450
20451 // Doubles vector sub
20452 instruct vsubD(vec dst, vec src) %{
20453 predicate(UseAVX == 0);
20454 match(Set dst (SubVD dst src));
20455 format %{ "subpd $dst,$src\t! sub packedD" %}
20456 ins_encode %{
20457 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20458 %}
20459 ins_pipe( pipe_slow );
20460 %}
20461
20462 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20463 predicate(UseAVX > 0);
20464 match(Set dst (SubVD src1 src2));
20465 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20466 ins_encode %{
20467 int vlen_enc = vector_length_encoding(this);
20468 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20469 %}
20470 ins_pipe( pipe_slow );
20471 %}
20472
20473 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20474 predicate((UseAVX > 0) &&
20475 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20476 match(Set dst (SubVD src (LoadVector mem)));
20477 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20478 ins_encode %{
20479 int vlen_enc = vector_length_encoding(this);
20480 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20481 %}
20482 ins_pipe( pipe_slow );
20483 %}
20484
20485 // --------------------------------- MUL --------------------------------------
20486
20487 // Byte vector mul
20488 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20489 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20490 match(Set dst (MulVB src1 src2));
20491 effect(TEMP dst, TEMP xtmp);
20492 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20493 ins_encode %{
20494 assert(UseSSE > 3, "required");
20495 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20496 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20497 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20498 __ psllw($dst$$XMMRegister, 8);
20499 __ psrlw($dst$$XMMRegister, 8);
20500 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20501 %}
20502 ins_pipe( pipe_slow );
20503 %}
20504
20505 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20506 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20507 match(Set dst (MulVB src1 src2));
20508 effect(TEMP dst, TEMP xtmp);
20509 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20510 ins_encode %{
20511 assert(UseSSE > 3, "required");
20512 // Odd-index elements
20513 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20514 __ psrlw($dst$$XMMRegister, 8);
20515 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20516 __ psrlw($xtmp$$XMMRegister, 8);
20517 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20518 __ psllw($dst$$XMMRegister, 8);
20519 // Even-index elements
20520 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20521 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20522 __ psllw($xtmp$$XMMRegister, 8);
20523 __ psrlw($xtmp$$XMMRegister, 8);
20524 // Combine
20525 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20526 %}
20527 ins_pipe( pipe_slow );
20528 %}
20529
20530 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20531 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20532 match(Set dst (MulVB src1 src2));
20533 effect(TEMP xtmp1, TEMP xtmp2);
20534 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20535 ins_encode %{
20536 int vlen_enc = vector_length_encoding(this);
20537 // Odd-index elements
20538 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20539 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20540 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20541 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20542 // Even-index elements
20543 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20544 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20545 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20546 // Combine
20547 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20548 %}
20549 ins_pipe( pipe_slow );
20550 %}
20551
20552 // Shorts/Chars vector mul
20553 instruct vmulS(vec dst, vec src) %{
20554 predicate(UseAVX == 0);
20555 match(Set dst (MulVS dst src));
20556 format %{ "pmullw $dst,$src\t! mul packedS" %}
20557 ins_encode %{
20558 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20559 %}
20560 ins_pipe( pipe_slow );
20561 %}
20562
20563 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20564 predicate(UseAVX > 0);
20565 match(Set dst (MulVS src1 src2));
20566 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20567 ins_encode %{
20568 int vlen_enc = vector_length_encoding(this);
20569 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20570 %}
20571 ins_pipe( pipe_slow );
20572 %}
20573
20574 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20575 predicate((UseAVX > 0) &&
20576 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20577 match(Set dst (MulVS src (LoadVector mem)));
20578 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20579 ins_encode %{
20580 int vlen_enc = vector_length_encoding(this);
20581 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20582 %}
20583 ins_pipe( pipe_slow );
20584 %}
20585
20586 // Integers vector mul
20587 instruct vmulI(vec dst, vec src) %{
20588 predicate(UseAVX == 0);
20589 match(Set dst (MulVI dst src));
20590 format %{ "pmulld $dst,$src\t! mul packedI" %}
20591 ins_encode %{
20592 assert(UseSSE > 3, "required");
20593 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20594 %}
20595 ins_pipe( pipe_slow );
20596 %}
20597
20598 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20599 predicate(UseAVX > 0);
20600 match(Set dst (MulVI src1 src2));
20601 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20602 ins_encode %{
20603 int vlen_enc = vector_length_encoding(this);
20604 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20605 %}
20606 ins_pipe( pipe_slow );
20607 %}
20608
20609 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20610 predicate((UseAVX > 0) &&
20611 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20612 match(Set dst (MulVI src (LoadVector mem)));
20613 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20614 ins_encode %{
20615 int vlen_enc = vector_length_encoding(this);
20616 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20617 %}
20618 ins_pipe( pipe_slow );
20619 %}
20620
20621 // Longs vector mul
20622 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20623 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20624 VM_Version::supports_avx512dq()) ||
20625 VM_Version::supports_avx512vldq());
20626 match(Set dst (MulVL src1 src2));
20627 ins_cost(500);
20628 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20629 ins_encode %{
20630 assert(UseAVX > 2, "required");
20631 int vlen_enc = vector_length_encoding(this);
20632 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20633 %}
20634 ins_pipe( pipe_slow );
20635 %}
20636
20637 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20638 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20639 VM_Version::supports_avx512dq()) ||
20640 (Matcher::vector_length_in_bytes(n) > 8 &&
20641 VM_Version::supports_avx512vldq()));
20642 match(Set dst (MulVL src (LoadVector mem)));
20643 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20644 ins_cost(500);
20645 ins_encode %{
20646 assert(UseAVX > 2, "required");
20647 int vlen_enc = vector_length_encoding(this);
20648 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20649 %}
20650 ins_pipe( pipe_slow );
20651 %}
20652
20653 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20654 predicate(UseAVX == 0);
20655 match(Set dst (MulVL src1 src2));
20656 ins_cost(500);
20657 effect(TEMP dst, TEMP xtmp);
20658 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20659 ins_encode %{
20660 assert(VM_Version::supports_sse4_1(), "required");
20661 // Get the lo-hi products, only the lower 32 bits is in concerns
20662 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20663 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20664 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20665 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20666 __ psllq($dst$$XMMRegister, 32);
20667 // Get the lo-lo products
20668 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20669 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20670 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20671 %}
20672 ins_pipe( pipe_slow );
20673 %}
20674
20675 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20676 predicate(UseAVX > 0 &&
20677 ((Matcher::vector_length_in_bytes(n) == 64 &&
20678 !VM_Version::supports_avx512dq()) ||
20679 (Matcher::vector_length_in_bytes(n) < 64 &&
20680 !VM_Version::supports_avx512vldq())));
20681 match(Set dst (MulVL src1 src2));
20682 effect(TEMP xtmp1, TEMP xtmp2);
20683 ins_cost(500);
20684 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20685 ins_encode %{
20686 int vlen_enc = vector_length_encoding(this);
20687 // Get the lo-hi products, only the lower 32 bits is in concerns
20688 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20689 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20690 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20691 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20692 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20693 // Get the lo-lo products
20694 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20695 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20696 %}
20697 ins_pipe( pipe_slow );
20698 %}
20699
20700 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20701 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20702 match(Set dst (MulVL src1 src2));
20703 ins_cost(100);
20704 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20705 ins_encode %{
20706 int vlen_enc = vector_length_encoding(this);
20707 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20708 %}
20709 ins_pipe( pipe_slow );
20710 %}
20711
20712 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20713 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20714 match(Set dst (MulVL src1 src2));
20715 ins_cost(100);
20716 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20717 ins_encode %{
20718 int vlen_enc = vector_length_encoding(this);
20719 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20720 %}
20721 ins_pipe( pipe_slow );
20722 %}
20723
20724 // Floats vector mul
20725 instruct vmulF(vec dst, vec src) %{
20726 predicate(UseAVX == 0);
20727 match(Set dst (MulVF dst src));
20728 format %{ "mulps $dst,$src\t! mul packedF" %}
20729 ins_encode %{
20730 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20731 %}
20732 ins_pipe( pipe_slow );
20733 %}
20734
20735 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20736 predicate(UseAVX > 0);
20737 match(Set dst (MulVF src1 src2));
20738 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20739 ins_encode %{
20740 int vlen_enc = vector_length_encoding(this);
20741 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20742 %}
20743 ins_pipe( pipe_slow );
20744 %}
20745
20746 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20747 predicate((UseAVX > 0) &&
20748 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20749 match(Set dst (MulVF src (LoadVector mem)));
20750 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20751 ins_encode %{
20752 int vlen_enc = vector_length_encoding(this);
20753 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20754 %}
20755 ins_pipe( pipe_slow );
20756 %}
20757
20758 // Doubles vector mul
20759 instruct vmulD(vec dst, vec src) %{
20760 predicate(UseAVX == 0);
20761 match(Set dst (MulVD dst src));
20762 format %{ "mulpd $dst,$src\t! mul packedD" %}
20763 ins_encode %{
20764 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20765 %}
20766 ins_pipe( pipe_slow );
20767 %}
20768
20769 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20770 predicate(UseAVX > 0);
20771 match(Set dst (MulVD src1 src2));
20772 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20773 ins_encode %{
20774 int vlen_enc = vector_length_encoding(this);
20775 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20776 %}
20777 ins_pipe( pipe_slow );
20778 %}
20779
20780 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20781 predicate((UseAVX > 0) &&
20782 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20783 match(Set dst (MulVD src (LoadVector mem)));
20784 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20785 ins_encode %{
20786 int vlen_enc = vector_length_encoding(this);
20787 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20788 %}
20789 ins_pipe( pipe_slow );
20790 %}
20791
20792 // --------------------------------- DIV --------------------------------------
20793
20794 // Floats vector div
20795 instruct vdivF(vec dst, vec src) %{
20796 predicate(UseAVX == 0);
20797 match(Set dst (DivVF dst src));
20798 format %{ "divps $dst,$src\t! div packedF" %}
20799 ins_encode %{
20800 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20801 %}
20802 ins_pipe( pipe_slow );
20803 %}
20804
20805 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20806 predicate(UseAVX > 0);
20807 match(Set dst (DivVF src1 src2));
20808 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20809 ins_encode %{
20810 int vlen_enc = vector_length_encoding(this);
20811 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20812 %}
20813 ins_pipe( pipe_slow );
20814 %}
20815
20816 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20817 predicate((UseAVX > 0) &&
20818 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20819 match(Set dst (DivVF src (LoadVector mem)));
20820 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20821 ins_encode %{
20822 int vlen_enc = vector_length_encoding(this);
20823 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20824 %}
20825 ins_pipe( pipe_slow );
20826 %}
20827
20828 // Doubles vector div
20829 instruct vdivD(vec dst, vec src) %{
20830 predicate(UseAVX == 0);
20831 match(Set dst (DivVD dst src));
20832 format %{ "divpd $dst,$src\t! div packedD" %}
20833 ins_encode %{
20834 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20835 %}
20836 ins_pipe( pipe_slow );
20837 %}
20838
20839 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20840 predicate(UseAVX > 0);
20841 match(Set dst (DivVD src1 src2));
20842 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20843 ins_encode %{
20844 int vlen_enc = vector_length_encoding(this);
20845 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20846 %}
20847 ins_pipe( pipe_slow );
20848 %}
20849
20850 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20851 predicate((UseAVX > 0) &&
20852 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20853 match(Set dst (DivVD src (LoadVector mem)));
20854 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20855 ins_encode %{
20856 int vlen_enc = vector_length_encoding(this);
20857 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20858 %}
20859 ins_pipe( pipe_slow );
20860 %}
20861
20862 // ------------------------------ MinMax ---------------------------------------
20863
20864 // Byte, Short, Int vector Min/Max
20865 instruct minmax_reg_sse(vec dst, vec src) %{
20866 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20867 UseAVX == 0);
20868 match(Set dst (MinV dst src));
20869 match(Set dst (MaxV dst src));
20870 format %{ "vector_minmax $dst,$src\t! " %}
20871 ins_encode %{
20872 assert(UseSSE >= 4, "required");
20873
20874 int opcode = this->ideal_Opcode();
20875 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20876 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20877 %}
20878 ins_pipe( pipe_slow );
20879 %}
20880
20881 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20882 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20883 UseAVX > 0);
20884 match(Set dst (MinV src1 src2));
20885 match(Set dst (MaxV src1 src2));
20886 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20887 ins_encode %{
20888 int opcode = this->ideal_Opcode();
20889 int vlen_enc = vector_length_encoding(this);
20890 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20891
20892 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20893 %}
20894 ins_pipe( pipe_slow );
20895 %}
20896
20897 // Long vector Min/Max
20898 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20899 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20900 UseAVX == 0);
20901 match(Set dst (MinV dst src));
20902 match(Set dst (MaxV src dst));
20903 effect(TEMP dst, TEMP tmp);
20904 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20905 ins_encode %{
20906 assert(UseSSE >= 4, "required");
20907
20908 int opcode = this->ideal_Opcode();
20909 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20910 assert(elem_bt == T_LONG, "sanity");
20911
20912 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20913 %}
20914 ins_pipe( pipe_slow );
20915 %}
20916
20917 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20918 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20919 UseAVX > 0 && !VM_Version::supports_avx512vl());
20920 match(Set dst (MinV src1 src2));
20921 match(Set dst (MaxV src1 src2));
20922 effect(TEMP dst);
20923 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20924 ins_encode %{
20925 int vlen_enc = vector_length_encoding(this);
20926 int opcode = this->ideal_Opcode();
20927 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20928 assert(elem_bt == T_LONG, "sanity");
20929
20930 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20931 %}
20932 ins_pipe( pipe_slow );
20933 %}
20934
20935 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20936 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20937 Matcher::vector_element_basic_type(n) == T_LONG);
20938 match(Set dst (MinV src1 src2));
20939 match(Set dst (MaxV src1 src2));
20940 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20941 ins_encode %{
20942 assert(UseAVX > 2, "required");
20943
20944 int vlen_enc = vector_length_encoding(this);
20945 int opcode = this->ideal_Opcode();
20946 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20947 assert(elem_bt == T_LONG, "sanity");
20948
20949 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20950 %}
20951 ins_pipe( pipe_slow );
20952 %}
20953
20954 // Float/Double vector Min/Max
20955 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20956 predicate(VM_Version::supports_avx10_2() &&
20957 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20958 match(Set dst (MinV a b));
20959 match(Set dst (MaxV a b));
20960 format %{ "vector_minmaxFP $dst, $a, $b" %}
20961 ins_encode %{
20962 int vlen_enc = vector_length_encoding(this);
20963 int opcode = this->ideal_Opcode();
20964 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20965 __ vminmax_fp(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20966 %}
20967 ins_pipe( pipe_slow );
20968 %}
20969
20970 // Float/Double vector Min/Max
20971 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20972 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20973 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20974 UseAVX > 0);
20975 match(Set dst (MinV a b));
20976 match(Set dst (MaxV a b));
20977 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20978 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20979 ins_encode %{
20980 assert(UseAVX > 0, "required");
20981
20982 int opcode = this->ideal_Opcode();
20983 int vlen_enc = vector_length_encoding(this);
20984 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20985
20986 __ vminmax_fp(opcode, elem_bt,
20987 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20988 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20989 %}
20990 ins_pipe( pipe_slow );
20991 %}
20992
20993 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20994 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20995 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20996 match(Set dst (MinV a b));
20997 match(Set dst (MaxV a b));
20998 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20999 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21000 ins_encode %{
21001 assert(UseAVX > 2, "required");
21002
21003 int opcode = this->ideal_Opcode();
21004 int vlen_enc = vector_length_encoding(this);
21005 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21006
21007 __ evminmax_fp(opcode, elem_bt,
21008 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21009 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21010 %}
21011 ins_pipe( pipe_slow );
21012 %}
21013
21014 // ------------------------------ Unsigned vector Min/Max ----------------------
21015
21016 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21017 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21018 match(Set dst (UMinV a b));
21019 match(Set dst (UMaxV a b));
21020 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21021 ins_encode %{
21022 int opcode = this->ideal_Opcode();
21023 int vlen_enc = vector_length_encoding(this);
21024 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21025 assert(is_integral_type(elem_bt), "");
21026 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21027 %}
21028 ins_pipe( pipe_slow );
21029 %}
21030
21031 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21032 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21033 match(Set dst (UMinV a (LoadVector b)));
21034 match(Set dst (UMaxV a (LoadVector b)));
21035 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21036 ins_encode %{
21037 int opcode = this->ideal_Opcode();
21038 int vlen_enc = vector_length_encoding(this);
21039 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21040 assert(is_integral_type(elem_bt), "");
21041 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21042 %}
21043 ins_pipe( pipe_slow );
21044 %}
21045
21046 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21047 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21048 match(Set dst (UMinV a b));
21049 match(Set dst (UMaxV a b));
21050 effect(TEMP xtmp1, TEMP xtmp2);
21051 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21052 ins_encode %{
21053 int opcode = this->ideal_Opcode();
21054 int vlen_enc = vector_length_encoding(this);
21055 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21056 %}
21057 ins_pipe( pipe_slow );
21058 %}
21059
21060 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21061 match(Set dst (UMinV (Binary dst src2) mask));
21062 match(Set dst (UMaxV (Binary dst src2) mask));
21063 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21064 ins_encode %{
21065 int vlen_enc = vector_length_encoding(this);
21066 BasicType bt = Matcher::vector_element_basic_type(this);
21067 int opc = this->ideal_Opcode();
21068 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21069 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21070 %}
21071 ins_pipe( pipe_slow );
21072 %}
21073
21074 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21075 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21076 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21077 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21078 ins_encode %{
21079 int vlen_enc = vector_length_encoding(this);
21080 BasicType bt = Matcher::vector_element_basic_type(this);
21081 int opc = this->ideal_Opcode();
21082 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21083 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21084 %}
21085 ins_pipe( pipe_slow );
21086 %}
21087
21088 // --------------------------------- Signum/CopySign ---------------------------
21089
21090 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21091 match(Set dst (SignumF dst (Binary zero one)));
21092 effect(KILL cr);
21093 format %{ "signumF $dst, $dst" %}
21094 ins_encode %{
21095 int opcode = this->ideal_Opcode();
21096 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21097 %}
21098 ins_pipe( pipe_slow );
21099 %}
21100
21101 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21102 match(Set dst (SignumD dst (Binary zero one)));
21103 effect(KILL cr);
21104 format %{ "signumD $dst, $dst" %}
21105 ins_encode %{
21106 int opcode = this->ideal_Opcode();
21107 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21108 %}
21109 ins_pipe( pipe_slow );
21110 %}
21111
21112 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21113 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21114 match(Set dst (SignumVF src (Binary zero one)));
21115 match(Set dst (SignumVD src (Binary zero one)));
21116 effect(TEMP dst, TEMP xtmp1);
21117 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21118 ins_encode %{
21119 int opcode = this->ideal_Opcode();
21120 int vec_enc = vector_length_encoding(this);
21121 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21122 $xtmp1$$XMMRegister, vec_enc);
21123 %}
21124 ins_pipe( pipe_slow );
21125 %}
21126
21127 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21128 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21129 match(Set dst (SignumVF src (Binary zero one)));
21130 match(Set dst (SignumVD src (Binary zero one)));
21131 effect(TEMP dst, TEMP ktmp1);
21132 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21133 ins_encode %{
21134 int opcode = this->ideal_Opcode();
21135 int vec_enc = vector_length_encoding(this);
21136 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21137 $ktmp1$$KRegister, vec_enc);
21138 %}
21139 ins_pipe( pipe_slow );
21140 %}
21141
21142 // ---------------------------------------
21143 // For copySign use 0xE4 as writemask for vpternlog
21144 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21145 // C (xmm2) is set to 0x7FFFFFFF
21146 // Wherever xmm2 is 0, we want to pick from B (sign)
21147 // Wherever xmm2 is 1, we want to pick from A (src)
21148 //
21149 // A B C Result
21150 // 0 0 0 0
21151 // 0 0 1 0
21152 // 0 1 0 1
21153 // 0 1 1 0
21154 // 1 0 0 0
21155 // 1 0 1 1
21156 // 1 1 0 1
21157 // 1 1 1 1
21158 //
21159 // Result going from high bit to low bit is 0x11100100 = 0xe4
21160 // ---------------------------------------
21161
21162 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21163 match(Set dst (CopySignF dst src));
21164 effect(TEMP tmp1, TEMP tmp2);
21165 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21166 ins_encode %{
21167 __ movl($tmp2$$Register, 0x7FFFFFFF);
21168 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21169 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21170 %}
21171 ins_pipe( pipe_slow );
21172 %}
21173
21174 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21175 match(Set dst (CopySignD dst (Binary src zero)));
21176 ins_cost(100);
21177 effect(TEMP tmp1, TEMP tmp2);
21178 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21179 ins_encode %{
21180 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21181 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21182 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21183 %}
21184 ins_pipe( pipe_slow );
21185 %}
21186
21187 //----------------------------- CompressBits/ExpandBits ------------------------
21188
21189 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21190 predicate(n->bottom_type()->isa_int());
21191 match(Set dst (CompressBits src mask));
21192 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21193 ins_encode %{
21194 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21195 %}
21196 ins_pipe( pipe_slow );
21197 %}
21198
21199 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21200 predicate(n->bottom_type()->isa_int());
21201 match(Set dst (ExpandBits src mask));
21202 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21203 ins_encode %{
21204 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21205 %}
21206 ins_pipe( pipe_slow );
21207 %}
21208
21209 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21210 predicate(n->bottom_type()->isa_int());
21211 match(Set dst (CompressBits src (LoadI mask)));
21212 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21213 ins_encode %{
21214 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21215 %}
21216 ins_pipe( pipe_slow );
21217 %}
21218
21219 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21220 predicate(n->bottom_type()->isa_int());
21221 match(Set dst (ExpandBits src (LoadI mask)));
21222 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21223 ins_encode %{
21224 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21225 %}
21226 ins_pipe( pipe_slow );
21227 %}
21228
21229 // --------------------------------- Sqrt --------------------------------------
21230
21231 instruct vsqrtF_reg(vec dst, vec src) %{
21232 match(Set dst (SqrtVF src));
21233 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21234 ins_encode %{
21235 assert(UseAVX > 0, "required");
21236 int vlen_enc = vector_length_encoding(this);
21237 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21238 %}
21239 ins_pipe( pipe_slow );
21240 %}
21241
21242 instruct vsqrtF_mem(vec dst, memory mem) %{
21243 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21244 match(Set dst (SqrtVF (LoadVector mem)));
21245 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21246 ins_encode %{
21247 assert(UseAVX > 0, "required");
21248 int vlen_enc = vector_length_encoding(this);
21249 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21250 %}
21251 ins_pipe( pipe_slow );
21252 %}
21253
21254 // Floating point vector sqrt
21255 instruct vsqrtD_reg(vec dst, vec src) %{
21256 match(Set dst (SqrtVD src));
21257 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21258 ins_encode %{
21259 assert(UseAVX > 0, "required");
21260 int vlen_enc = vector_length_encoding(this);
21261 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21262 %}
21263 ins_pipe( pipe_slow );
21264 %}
21265
21266 instruct vsqrtD_mem(vec dst, memory mem) %{
21267 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21268 match(Set dst (SqrtVD (LoadVector mem)));
21269 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21270 ins_encode %{
21271 assert(UseAVX > 0, "required");
21272 int vlen_enc = vector_length_encoding(this);
21273 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21274 %}
21275 ins_pipe( pipe_slow );
21276 %}
21277
21278 // ------------------------------ Shift ---------------------------------------
21279
21280 // Left and right shift count vectors are the same on x86
21281 // (only lowest bits of xmm reg are used for count).
21282 instruct vshiftcnt(vec dst, rRegI cnt) %{
21283 match(Set dst (LShiftCntV cnt));
21284 match(Set dst (RShiftCntV cnt));
21285 format %{ "movdl $dst,$cnt\t! load shift count" %}
21286 ins_encode %{
21287 __ movdl($dst$$XMMRegister, $cnt$$Register);
21288 %}
21289 ins_pipe( pipe_slow );
21290 %}
21291
21292 // Byte vector shift
21293 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21294 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21295 match(Set dst ( LShiftVB src shift));
21296 match(Set dst ( RShiftVB src shift));
21297 match(Set dst (URShiftVB src shift));
21298 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21299 format %{"vector_byte_shift $dst,$src,$shift" %}
21300 ins_encode %{
21301 assert(UseSSE > 3, "required");
21302 int opcode = this->ideal_Opcode();
21303 bool sign = (opcode != Op_URShiftVB);
21304 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21305 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21306 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21307 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21308 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21309 %}
21310 ins_pipe( pipe_slow );
21311 %}
21312
21313 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21314 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21315 UseAVX <= 1);
21316 match(Set dst ( LShiftVB src shift));
21317 match(Set dst ( RShiftVB src shift));
21318 match(Set dst (URShiftVB src shift));
21319 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21320 format %{"vector_byte_shift $dst,$src,$shift" %}
21321 ins_encode %{
21322 assert(UseSSE > 3, "required");
21323 int opcode = this->ideal_Opcode();
21324 bool sign = (opcode != Op_URShiftVB);
21325 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21326 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21327 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21328 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21329 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21330 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21331 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21332 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21333 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21334 %}
21335 ins_pipe( pipe_slow );
21336 %}
21337
21338 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21339 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21340 UseAVX > 1);
21341 match(Set dst ( LShiftVB src shift));
21342 match(Set dst ( RShiftVB src shift));
21343 match(Set dst (URShiftVB src shift));
21344 effect(TEMP dst, TEMP tmp);
21345 format %{"vector_byte_shift $dst,$src,$shift" %}
21346 ins_encode %{
21347 int opcode = this->ideal_Opcode();
21348 bool sign = (opcode != Op_URShiftVB);
21349 int vlen_enc = Assembler::AVX_256bit;
21350 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21351 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21352 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21353 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21354 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21355 %}
21356 ins_pipe( pipe_slow );
21357 %}
21358
21359 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21360 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21361 match(Set dst ( LShiftVB src shift));
21362 match(Set dst ( RShiftVB src shift));
21363 match(Set dst (URShiftVB src shift));
21364 effect(TEMP dst, TEMP tmp);
21365 format %{"vector_byte_shift $dst,$src,$shift" %}
21366 ins_encode %{
21367 assert(UseAVX > 1, "required");
21368 int opcode = this->ideal_Opcode();
21369 bool sign = (opcode != Op_URShiftVB);
21370 int vlen_enc = Assembler::AVX_256bit;
21371 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21372 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21373 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21374 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21375 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21377 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21378 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21379 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21380 %}
21381 ins_pipe( pipe_slow );
21382 %}
21383
21384 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21385 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21386 match(Set dst ( LShiftVB src shift));
21387 match(Set dst (RShiftVB src shift));
21388 match(Set dst (URShiftVB src shift));
21389 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21390 format %{"vector_byte_shift $dst,$src,$shift" %}
21391 ins_encode %{
21392 assert(UseAVX > 2, "required");
21393 int opcode = this->ideal_Opcode();
21394 bool sign = (opcode != Op_URShiftVB);
21395 int vlen_enc = Assembler::AVX_512bit;
21396 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21397 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21398 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21399 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21400 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21401 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21402 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21403 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21404 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21406 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21407 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21408 %}
21409 ins_pipe( pipe_slow );
21410 %}
21411
21412 // Shorts vector logical right shift produces incorrect Java result
21413 // for negative data because java code convert short value into int with
21414 // sign extension before a shift. But char vectors are fine since chars are
21415 // unsigned values.
21416 // Shorts/Chars vector left shift
21417 instruct vshiftS(vec dst, vec src, vec shift) %{
21418 predicate(!n->as_ShiftV()->is_var_shift());
21419 match(Set dst ( LShiftVS src shift));
21420 match(Set dst ( RShiftVS src shift));
21421 match(Set dst (URShiftVS src shift));
21422 effect(TEMP dst, USE src, USE shift);
21423 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21424 ins_encode %{
21425 int opcode = this->ideal_Opcode();
21426 if (UseAVX > 0) {
21427 int vlen_enc = vector_length_encoding(this);
21428 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21429 } else {
21430 int vlen = Matcher::vector_length(this);
21431 if (vlen == 2) {
21432 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21433 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21434 } else if (vlen == 4) {
21435 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21436 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21437 } else {
21438 assert (vlen == 8, "sanity");
21439 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21440 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21441 }
21442 }
21443 %}
21444 ins_pipe( pipe_slow );
21445 %}
21446
21447 // Integers vector left shift
21448 instruct vshiftI(vec dst, vec src, vec shift) %{
21449 predicate(!n->as_ShiftV()->is_var_shift());
21450 match(Set dst ( LShiftVI src shift));
21451 match(Set dst ( RShiftVI src shift));
21452 match(Set dst (URShiftVI src shift));
21453 effect(TEMP dst, USE src, USE shift);
21454 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21455 ins_encode %{
21456 int opcode = this->ideal_Opcode();
21457 if (UseAVX > 0) {
21458 int vlen_enc = vector_length_encoding(this);
21459 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21460 } else {
21461 int vlen = Matcher::vector_length(this);
21462 if (vlen == 2) {
21463 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21464 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21465 } else {
21466 assert(vlen == 4, "sanity");
21467 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21468 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21469 }
21470 }
21471 %}
21472 ins_pipe( pipe_slow );
21473 %}
21474
21475 // Integers vector left constant shift
21476 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21477 match(Set dst (LShiftVI src (LShiftCntV shift)));
21478 match(Set dst (RShiftVI src (RShiftCntV shift)));
21479 match(Set dst (URShiftVI src (RShiftCntV shift)));
21480 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21481 ins_encode %{
21482 int opcode = this->ideal_Opcode();
21483 if (UseAVX > 0) {
21484 int vector_len = vector_length_encoding(this);
21485 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21486 } else {
21487 int vlen = Matcher::vector_length(this);
21488 if (vlen == 2) {
21489 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21490 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21491 } else {
21492 assert(vlen == 4, "sanity");
21493 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21494 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21495 }
21496 }
21497 %}
21498 ins_pipe( pipe_slow );
21499 %}
21500
21501 // Longs vector shift
21502 instruct vshiftL(vec dst, vec src, vec shift) %{
21503 predicate(!n->as_ShiftV()->is_var_shift());
21504 match(Set dst ( LShiftVL src shift));
21505 match(Set dst (URShiftVL src shift));
21506 effect(TEMP dst, USE src, USE shift);
21507 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21508 ins_encode %{
21509 int opcode = this->ideal_Opcode();
21510 if (UseAVX > 0) {
21511 int vlen_enc = vector_length_encoding(this);
21512 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21513 } else {
21514 assert(Matcher::vector_length(this) == 2, "");
21515 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21516 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21517 }
21518 %}
21519 ins_pipe( pipe_slow );
21520 %}
21521
21522 // Longs vector constant shift
21523 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21524 match(Set dst (LShiftVL src (LShiftCntV shift)));
21525 match(Set dst (URShiftVL src (RShiftCntV shift)));
21526 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21527 ins_encode %{
21528 int opcode = this->ideal_Opcode();
21529 if (UseAVX > 0) {
21530 int vector_len = vector_length_encoding(this);
21531 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21532 } else {
21533 assert(Matcher::vector_length(this) == 2, "");
21534 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21535 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21536 }
21537 %}
21538 ins_pipe( pipe_slow );
21539 %}
21540
21541 // -------------------ArithmeticRightShift -----------------------------------
21542 // Long vector arithmetic right shift
21543 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21544 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21545 match(Set dst (RShiftVL src shift));
21546 effect(TEMP dst, TEMP tmp);
21547 format %{ "vshiftq $dst,$src,$shift" %}
21548 ins_encode %{
21549 uint vlen = Matcher::vector_length(this);
21550 if (vlen == 2) {
21551 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21552 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21553 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21554 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21555 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21556 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21557 } else {
21558 assert(vlen == 4, "sanity");
21559 assert(UseAVX > 1, "required");
21560 int vlen_enc = Assembler::AVX_256bit;
21561 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21562 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21563 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21564 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21565 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21566 }
21567 %}
21568 ins_pipe( pipe_slow );
21569 %}
21570
21571 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21572 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21573 match(Set dst (RShiftVL src shift));
21574 format %{ "vshiftq $dst,$src,$shift" %}
21575 ins_encode %{
21576 int vlen_enc = vector_length_encoding(this);
21577 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21578 %}
21579 ins_pipe( pipe_slow );
21580 %}
21581
21582 // ------------------- Variable Shift -----------------------------
21583 // Byte variable shift
21584 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21585 predicate(Matcher::vector_length(n) <= 8 &&
21586 n->as_ShiftV()->is_var_shift() &&
21587 !VM_Version::supports_avx512bw());
21588 match(Set dst ( LShiftVB src shift));
21589 match(Set dst ( RShiftVB src shift));
21590 match(Set dst (URShiftVB src shift));
21591 effect(TEMP dst, TEMP vtmp);
21592 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21593 ins_encode %{
21594 assert(UseAVX >= 2, "required");
21595
21596 int opcode = this->ideal_Opcode();
21597 int vlen_enc = Assembler::AVX_128bit;
21598 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21599 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21600 %}
21601 ins_pipe( pipe_slow );
21602 %}
21603
21604 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21605 predicate(Matcher::vector_length(n) == 16 &&
21606 n->as_ShiftV()->is_var_shift() &&
21607 !VM_Version::supports_avx512bw());
21608 match(Set dst ( LShiftVB src shift));
21609 match(Set dst ( RShiftVB src shift));
21610 match(Set dst (URShiftVB src shift));
21611 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21612 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21613 ins_encode %{
21614 assert(UseAVX >= 2, "required");
21615
21616 int opcode = this->ideal_Opcode();
21617 int vlen_enc = Assembler::AVX_128bit;
21618 // Shift lower half and get word result in dst
21619 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21620
21621 // Shift upper half and get word result in vtmp1
21622 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21623 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21624 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21625
21626 // Merge and down convert the two word results to byte in dst
21627 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21628 %}
21629 ins_pipe( pipe_slow );
21630 %}
21631
21632 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21633 predicate(Matcher::vector_length(n) == 32 &&
21634 n->as_ShiftV()->is_var_shift() &&
21635 !VM_Version::supports_avx512bw());
21636 match(Set dst ( LShiftVB src shift));
21637 match(Set dst ( RShiftVB src shift));
21638 match(Set dst (URShiftVB src shift));
21639 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21640 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21641 ins_encode %{
21642 assert(UseAVX >= 2, "required");
21643
21644 int opcode = this->ideal_Opcode();
21645 int vlen_enc = Assembler::AVX_128bit;
21646 // Process lower 128 bits and get result in dst
21647 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21648 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21649 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21650 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21651 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21652
21653 // Process higher 128 bits and get result in vtmp3
21654 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21655 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21656 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21657 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21658 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21659 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21660 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21661
21662 // Merge the two results in dst
21663 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21664 %}
21665 ins_pipe( pipe_slow );
21666 %}
21667
21668 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21669 predicate(Matcher::vector_length(n) <= 32 &&
21670 n->as_ShiftV()->is_var_shift() &&
21671 VM_Version::supports_avx512bw());
21672 match(Set dst ( LShiftVB src shift));
21673 match(Set dst ( RShiftVB src shift));
21674 match(Set dst (URShiftVB src shift));
21675 effect(TEMP dst, TEMP vtmp);
21676 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21677 ins_encode %{
21678 assert(UseAVX > 2, "required");
21679
21680 int opcode = this->ideal_Opcode();
21681 int vlen_enc = vector_length_encoding(this);
21682 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21683 %}
21684 ins_pipe( pipe_slow );
21685 %}
21686
21687 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21688 predicate(Matcher::vector_length(n) == 64 &&
21689 n->as_ShiftV()->is_var_shift() &&
21690 VM_Version::supports_avx512bw());
21691 match(Set dst ( LShiftVB src shift));
21692 match(Set dst ( RShiftVB src shift));
21693 match(Set dst (URShiftVB src shift));
21694 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21695 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21696 ins_encode %{
21697 assert(UseAVX > 2, "required");
21698
21699 int opcode = this->ideal_Opcode();
21700 int vlen_enc = Assembler::AVX_256bit;
21701 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21702 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21703 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21704 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21705 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21706 %}
21707 ins_pipe( pipe_slow );
21708 %}
21709
21710 // Short variable shift
21711 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21712 predicate(Matcher::vector_length(n) <= 8 &&
21713 n->as_ShiftV()->is_var_shift() &&
21714 !VM_Version::supports_avx512bw());
21715 match(Set dst ( LShiftVS src shift));
21716 match(Set dst ( RShiftVS src shift));
21717 match(Set dst (URShiftVS src shift));
21718 effect(TEMP dst, TEMP vtmp);
21719 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21720 ins_encode %{
21721 assert(UseAVX >= 2, "required");
21722
21723 int opcode = this->ideal_Opcode();
21724 bool sign = (opcode != Op_URShiftVS);
21725 int vlen_enc = Assembler::AVX_256bit;
21726 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21727 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21728 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21729 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21730 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21731 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21732 %}
21733 ins_pipe( pipe_slow );
21734 %}
21735
21736 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21737 predicate(Matcher::vector_length(n) == 16 &&
21738 n->as_ShiftV()->is_var_shift() &&
21739 !VM_Version::supports_avx512bw());
21740 match(Set dst ( LShiftVS src shift));
21741 match(Set dst ( RShiftVS src shift));
21742 match(Set dst (URShiftVS src shift));
21743 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21744 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21745 ins_encode %{
21746 assert(UseAVX >= 2, "required");
21747
21748 int opcode = this->ideal_Opcode();
21749 bool sign = (opcode != Op_URShiftVS);
21750 int vlen_enc = Assembler::AVX_256bit;
21751 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21752 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21753 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21754 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21755 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21756
21757 // Shift upper half, with result in dst using vtmp1 as TEMP
21758 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21759 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21760 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21761 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21762 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21763 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21764
21765 // Merge lower and upper half result into dst
21766 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21767 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21768 %}
21769 ins_pipe( pipe_slow );
21770 %}
21771
21772 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21773 predicate(n->as_ShiftV()->is_var_shift() &&
21774 VM_Version::supports_avx512bw());
21775 match(Set dst ( LShiftVS src shift));
21776 match(Set dst ( RShiftVS src shift));
21777 match(Set dst (URShiftVS src shift));
21778 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21779 ins_encode %{
21780 assert(UseAVX > 2, "required");
21781
21782 int opcode = this->ideal_Opcode();
21783 int vlen_enc = vector_length_encoding(this);
21784 if (!VM_Version::supports_avx512vl()) {
21785 vlen_enc = Assembler::AVX_512bit;
21786 }
21787 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21788 %}
21789 ins_pipe( pipe_slow );
21790 %}
21791
21792 //Integer variable shift
21793 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21794 predicate(n->as_ShiftV()->is_var_shift());
21795 match(Set dst ( LShiftVI src shift));
21796 match(Set dst ( RShiftVI src shift));
21797 match(Set dst (URShiftVI src shift));
21798 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21799 ins_encode %{
21800 assert(UseAVX >= 2, "required");
21801
21802 int opcode = this->ideal_Opcode();
21803 int vlen_enc = vector_length_encoding(this);
21804 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21805 %}
21806 ins_pipe( pipe_slow );
21807 %}
21808
21809 //Long variable shift
21810 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21811 predicate(n->as_ShiftV()->is_var_shift());
21812 match(Set dst ( LShiftVL src shift));
21813 match(Set dst (URShiftVL src shift));
21814 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21815 ins_encode %{
21816 assert(UseAVX >= 2, "required");
21817
21818 int opcode = this->ideal_Opcode();
21819 int vlen_enc = vector_length_encoding(this);
21820 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21821 %}
21822 ins_pipe( pipe_slow );
21823 %}
21824
21825 //Long variable right shift arithmetic
21826 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21827 predicate(Matcher::vector_length(n) <= 4 &&
21828 n->as_ShiftV()->is_var_shift() &&
21829 UseAVX == 2);
21830 match(Set dst (RShiftVL src shift));
21831 effect(TEMP dst, TEMP vtmp);
21832 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21833 ins_encode %{
21834 int opcode = this->ideal_Opcode();
21835 int vlen_enc = vector_length_encoding(this);
21836 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21837 $vtmp$$XMMRegister);
21838 %}
21839 ins_pipe( pipe_slow );
21840 %}
21841
21842 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21843 predicate(n->as_ShiftV()->is_var_shift() &&
21844 UseAVX > 2);
21845 match(Set dst (RShiftVL src shift));
21846 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21847 ins_encode %{
21848 int opcode = this->ideal_Opcode();
21849 int vlen_enc = vector_length_encoding(this);
21850 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21851 %}
21852 ins_pipe( pipe_slow );
21853 %}
21854
21855 // --------------------------------- AND --------------------------------------
21856
21857 instruct vand(vec dst, vec src) %{
21858 predicate(UseAVX == 0);
21859 match(Set dst (AndV dst src));
21860 format %{ "pand $dst,$src\t! and vectors" %}
21861 ins_encode %{
21862 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21863 %}
21864 ins_pipe( pipe_slow );
21865 %}
21866
21867 instruct vand_reg(vec dst, vec src1, vec src2) %{
21868 predicate(UseAVX > 0);
21869 match(Set dst (AndV src1 src2));
21870 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21871 ins_encode %{
21872 int vlen_enc = vector_length_encoding(this);
21873 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21874 %}
21875 ins_pipe( pipe_slow );
21876 %}
21877
21878 instruct vand_mem(vec dst, vec src, memory mem) %{
21879 predicate((UseAVX > 0) &&
21880 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21881 match(Set dst (AndV src (LoadVector mem)));
21882 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21883 ins_encode %{
21884 int vlen_enc = vector_length_encoding(this);
21885 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21886 %}
21887 ins_pipe( pipe_slow );
21888 %}
21889
21890 // --------------------------------- OR ---------------------------------------
21891
21892 instruct vor(vec dst, vec src) %{
21893 predicate(UseAVX == 0);
21894 match(Set dst (OrV dst src));
21895 format %{ "por $dst,$src\t! or vectors" %}
21896 ins_encode %{
21897 __ por($dst$$XMMRegister, $src$$XMMRegister);
21898 %}
21899 ins_pipe( pipe_slow );
21900 %}
21901
21902 instruct vor_reg(vec dst, vec src1, vec src2) %{
21903 predicate(UseAVX > 0);
21904 match(Set dst (OrV src1 src2));
21905 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21906 ins_encode %{
21907 int vlen_enc = vector_length_encoding(this);
21908 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21909 %}
21910 ins_pipe( pipe_slow );
21911 %}
21912
21913 instruct vor_mem(vec dst, vec src, memory mem) %{
21914 predicate((UseAVX > 0) &&
21915 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21916 match(Set dst (OrV src (LoadVector mem)));
21917 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21918 ins_encode %{
21919 int vlen_enc = vector_length_encoding(this);
21920 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21921 %}
21922 ins_pipe( pipe_slow );
21923 %}
21924
21925 // --------------------------------- XOR --------------------------------------
21926
21927 instruct vxor(vec dst, vec src) %{
21928 predicate(UseAVX == 0);
21929 match(Set dst (XorV dst src));
21930 format %{ "pxor $dst,$src\t! xor vectors" %}
21931 ins_encode %{
21932 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21933 %}
21934 ins_pipe( pipe_slow );
21935 %}
21936
21937 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21938 predicate(UseAVX > 0);
21939 match(Set dst (XorV src1 src2));
21940 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21941 ins_encode %{
21942 int vlen_enc = vector_length_encoding(this);
21943 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21944 %}
21945 ins_pipe( pipe_slow );
21946 %}
21947
21948 instruct vxor_mem(vec dst, vec src, memory mem) %{
21949 predicate((UseAVX > 0) &&
21950 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21951 match(Set dst (XorV src (LoadVector mem)));
21952 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21953 ins_encode %{
21954 int vlen_enc = vector_length_encoding(this);
21955 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21956 %}
21957 ins_pipe( pipe_slow );
21958 %}
21959
21960 // --------------------------------- VectorCast --------------------------------------
21961
21962 instruct vcastBtoX(vec dst, vec src) %{
21963 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21964 match(Set dst (VectorCastB2X src));
21965 format %{ "vector_cast_b2x $dst,$src\t!" %}
21966 ins_encode %{
21967 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21968 int vlen_enc = vector_length_encoding(this);
21969 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21970 %}
21971 ins_pipe( pipe_slow );
21972 %}
21973
21974 instruct vcastBtoD(legVec dst, legVec src) %{
21975 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21976 match(Set dst (VectorCastB2X src));
21977 format %{ "vector_cast_b2x $dst,$src\t!" %}
21978 ins_encode %{
21979 int vlen_enc = vector_length_encoding(this);
21980 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21981 %}
21982 ins_pipe( pipe_slow );
21983 %}
21984
21985 instruct castStoX(vec dst, vec src) %{
21986 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21987 Matcher::vector_length(n->in(1)) <= 8 && // src
21988 Matcher::vector_element_basic_type(n) == T_BYTE);
21989 match(Set dst (VectorCastS2X src));
21990 format %{ "vector_cast_s2x $dst,$src" %}
21991 ins_encode %{
21992 assert(UseAVX > 0, "required");
21993
21994 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21995 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21996 %}
21997 ins_pipe( pipe_slow );
21998 %}
21999
22000 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22001 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22002 Matcher::vector_length(n->in(1)) == 16 && // src
22003 Matcher::vector_element_basic_type(n) == T_BYTE);
22004 effect(TEMP dst, TEMP vtmp);
22005 match(Set dst (VectorCastS2X src));
22006 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22007 ins_encode %{
22008 assert(UseAVX > 0, "required");
22009
22010 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22011 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22012 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22013 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22014 %}
22015 ins_pipe( pipe_slow );
22016 %}
22017
22018 instruct vcastStoX_evex(vec dst, vec src) %{
22019 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22020 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22021 match(Set dst (VectorCastS2X src));
22022 format %{ "vector_cast_s2x $dst,$src\t!" %}
22023 ins_encode %{
22024 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22025 int src_vlen_enc = vector_length_encoding(this, $src);
22026 int vlen_enc = vector_length_encoding(this);
22027 switch (to_elem_bt) {
22028 case T_BYTE:
22029 if (!VM_Version::supports_avx512vl()) {
22030 vlen_enc = Assembler::AVX_512bit;
22031 }
22032 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22033 break;
22034 case T_INT:
22035 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22036 break;
22037 case T_FLOAT:
22038 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22039 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22040 break;
22041 case T_LONG:
22042 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22043 break;
22044 case T_DOUBLE: {
22045 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22046 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22047 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22048 break;
22049 }
22050 default:
22051 ShouldNotReachHere();
22052 }
22053 %}
22054 ins_pipe( pipe_slow );
22055 %}
22056
22057 instruct castItoX(vec dst, vec src) %{
22058 predicate(UseAVX <= 2 &&
22059 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22060 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22061 match(Set dst (VectorCastI2X src));
22062 format %{ "vector_cast_i2x $dst,$src" %}
22063 ins_encode %{
22064 assert(UseAVX > 0, "required");
22065
22066 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22067 int vlen_enc = vector_length_encoding(this, $src);
22068
22069 if (to_elem_bt == T_BYTE) {
22070 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22071 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22072 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22073 } else {
22074 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22075 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22076 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22077 }
22078 %}
22079 ins_pipe( pipe_slow );
22080 %}
22081
22082 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22083 predicate(UseAVX <= 2 &&
22084 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22085 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22086 match(Set dst (VectorCastI2X src));
22087 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22088 effect(TEMP dst, TEMP vtmp);
22089 ins_encode %{
22090 assert(UseAVX > 0, "required");
22091
22092 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22093 int vlen_enc = vector_length_encoding(this, $src);
22094
22095 if (to_elem_bt == T_BYTE) {
22096 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22097 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22098 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22099 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22100 } else {
22101 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22102 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22103 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22104 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22105 }
22106 %}
22107 ins_pipe( pipe_slow );
22108 %}
22109
22110 instruct vcastItoX_evex(vec dst, vec src) %{
22111 predicate(UseAVX > 2 ||
22112 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22113 match(Set dst (VectorCastI2X src));
22114 format %{ "vector_cast_i2x $dst,$src\t!" %}
22115 ins_encode %{
22116 assert(UseAVX > 0, "required");
22117
22118 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22119 int src_vlen_enc = vector_length_encoding(this, $src);
22120 int dst_vlen_enc = vector_length_encoding(this);
22121 switch (dst_elem_bt) {
22122 case T_BYTE:
22123 if (!VM_Version::supports_avx512vl()) {
22124 src_vlen_enc = Assembler::AVX_512bit;
22125 }
22126 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22127 break;
22128 case T_SHORT:
22129 if (!VM_Version::supports_avx512vl()) {
22130 src_vlen_enc = Assembler::AVX_512bit;
22131 }
22132 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22133 break;
22134 case T_FLOAT:
22135 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22136 break;
22137 case T_LONG:
22138 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22139 break;
22140 case T_DOUBLE:
22141 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22142 break;
22143 default:
22144 ShouldNotReachHere();
22145 }
22146 %}
22147 ins_pipe( pipe_slow );
22148 %}
22149
22150 instruct vcastLtoBS(vec dst, vec src) %{
22151 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22152 UseAVX <= 2);
22153 match(Set dst (VectorCastL2X src));
22154 format %{ "vector_cast_l2x $dst,$src" %}
22155 ins_encode %{
22156 assert(UseAVX > 0, "required");
22157
22158 int vlen = Matcher::vector_length_in_bytes(this, $src);
22159 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22160 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22161 : ExternalAddress(vector_int_to_short_mask());
22162 if (vlen <= 16) {
22163 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22164 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22165 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166 } else {
22167 assert(vlen <= 32, "required");
22168 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22169 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22170 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22171 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22172 }
22173 if (to_elem_bt == T_BYTE) {
22174 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22175 }
22176 %}
22177 ins_pipe( pipe_slow );
22178 %}
22179
22180 instruct vcastLtoX_evex(vec dst, vec src) %{
22181 predicate(UseAVX > 2 ||
22182 (Matcher::vector_element_basic_type(n) == T_INT ||
22183 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22184 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22185 match(Set dst (VectorCastL2X src));
22186 format %{ "vector_cast_l2x $dst,$src\t!" %}
22187 ins_encode %{
22188 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22189 int vlen = Matcher::vector_length_in_bytes(this, $src);
22190 int vlen_enc = vector_length_encoding(this, $src);
22191 switch (to_elem_bt) {
22192 case T_BYTE:
22193 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22194 vlen_enc = Assembler::AVX_512bit;
22195 }
22196 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22197 break;
22198 case T_SHORT:
22199 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22200 vlen_enc = Assembler::AVX_512bit;
22201 }
22202 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22203 break;
22204 case T_INT:
22205 if (vlen == 8) {
22206 if ($dst$$XMMRegister != $src$$XMMRegister) {
22207 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22208 }
22209 } else if (vlen == 16) {
22210 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22211 } else if (vlen == 32) {
22212 if (UseAVX > 2) {
22213 if (!VM_Version::supports_avx512vl()) {
22214 vlen_enc = Assembler::AVX_512bit;
22215 }
22216 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22217 } else {
22218 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22219 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22220 }
22221 } else { // vlen == 64
22222 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22223 }
22224 break;
22225 case T_FLOAT:
22226 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22227 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22228 break;
22229 case T_DOUBLE:
22230 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22231 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22232 break;
22233
22234 default: assert(false, "%s", type2name(to_elem_bt));
22235 }
22236 %}
22237 ins_pipe( pipe_slow );
22238 %}
22239
22240 instruct vcastFtoD_reg(vec dst, vec src) %{
22241 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22242 match(Set dst (VectorCastF2X src));
22243 format %{ "vector_cast_f2d $dst,$src\t!" %}
22244 ins_encode %{
22245 int vlen_enc = vector_length_encoding(this);
22246 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22247 %}
22248 ins_pipe( pipe_slow );
22249 %}
22250
22251
22252 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22253 predicate(!VM_Version::supports_avx10_2() &&
22254 !VM_Version::supports_avx512vl() &&
22255 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22256 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22257 is_integral_type(Matcher::vector_element_basic_type(n)));
22258 match(Set dst (VectorCastF2X src));
22259 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22260 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22261 ins_encode %{
22262 int vlen_enc = vector_length_encoding(this, $src);
22263 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22264 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22265 // 32 bit addresses for register indirect addressing mode since stub constants
22266 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22267 // However, targets are free to increase this limit, but having a large code cache size
22268 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22269 // cap we save a temporary register allocation which in limiting case can prevent
22270 // spilling in high register pressure blocks.
22271 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22272 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22273 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22274 %}
22275 ins_pipe( pipe_slow );
22276 %}
22277
22278 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22279 predicate(!VM_Version::supports_avx10_2() &&
22280 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22281 is_integral_type(Matcher::vector_element_basic_type(n)));
22282 match(Set dst (VectorCastF2X src));
22283 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22284 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22285 ins_encode %{
22286 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22287 if (to_elem_bt == T_LONG) {
22288 int vlen_enc = vector_length_encoding(this);
22289 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22290 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22291 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22292 } else {
22293 int vlen_enc = vector_length_encoding(this, $src);
22294 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22295 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22296 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22297 }
22298 %}
22299 ins_pipe( pipe_slow );
22300 %}
22301
22302 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22303 predicate(VM_Version::supports_avx10_2() &&
22304 is_integral_type(Matcher::vector_element_basic_type(n)));
22305 match(Set dst (VectorCastF2X src));
22306 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22307 ins_encode %{
22308 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22309 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22310 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22311 %}
22312 ins_pipe( pipe_slow );
22313 %}
22314
22315 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22316 predicate(VM_Version::supports_avx10_2() &&
22317 is_integral_type(Matcher::vector_element_basic_type(n)));
22318 match(Set dst (VectorCastF2X (LoadVector src)));
22319 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22320 ins_encode %{
22321 int vlen = Matcher::vector_length(this);
22322 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22323 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22324 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22325 %}
22326 ins_pipe( pipe_slow );
22327 %}
22328
22329 instruct vcastDtoF_reg(vec dst, vec src) %{
22330 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22331 match(Set dst (VectorCastD2X src));
22332 format %{ "vector_cast_d2x $dst,$src\t!" %}
22333 ins_encode %{
22334 int vlen_enc = vector_length_encoding(this, $src);
22335 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22336 %}
22337 ins_pipe( pipe_slow );
22338 %}
22339
22340 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22341 predicate(!VM_Version::supports_avx10_2() &&
22342 !VM_Version::supports_avx512vl() &&
22343 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22344 is_integral_type(Matcher::vector_element_basic_type(n)));
22345 match(Set dst (VectorCastD2X src));
22346 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22347 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22348 ins_encode %{
22349 int vlen_enc = vector_length_encoding(this, $src);
22350 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22351 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22352 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22353 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22354 %}
22355 ins_pipe( pipe_slow );
22356 %}
22357
22358 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22359 predicate(!VM_Version::supports_avx10_2() &&
22360 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22361 is_integral_type(Matcher::vector_element_basic_type(n)));
22362 match(Set dst (VectorCastD2X src));
22363 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22364 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22365 ins_encode %{
22366 int vlen_enc = vector_length_encoding(this, $src);
22367 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22368 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22369 ExternalAddress(vector_float_signflip());
22370 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22371 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22372 %}
22373 ins_pipe( pipe_slow );
22374 %}
22375
22376 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22377 predicate(VM_Version::supports_avx10_2() &&
22378 is_integral_type(Matcher::vector_element_basic_type(n)));
22379 match(Set dst (VectorCastD2X src));
22380 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22381 ins_encode %{
22382 int vlen_enc = vector_length_encoding(this, $src);
22383 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22384 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22385 %}
22386 ins_pipe( pipe_slow );
22387 %}
22388
22389 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22390 predicate(VM_Version::supports_avx10_2() &&
22391 is_integral_type(Matcher::vector_element_basic_type(n)));
22392 match(Set dst (VectorCastD2X (LoadVector src)));
22393 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22394 ins_encode %{
22395 int vlen = Matcher::vector_length(this);
22396 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22397 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22398 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22399 %}
22400 ins_pipe( pipe_slow );
22401 %}
22402
22403 instruct vucast(vec dst, vec src) %{
22404 match(Set dst (VectorUCastB2X src));
22405 match(Set dst (VectorUCastS2X src));
22406 match(Set dst (VectorUCastI2X src));
22407 format %{ "vector_ucast $dst,$src\t!" %}
22408 ins_encode %{
22409 assert(UseAVX > 0, "required");
22410
22411 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22412 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22413 int vlen_enc = vector_length_encoding(this);
22414 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22415 %}
22416 ins_pipe( pipe_slow );
22417 %}
22418
22419 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22420 predicate(!VM_Version::supports_avx512vl() &&
22421 Matcher::vector_length_in_bytes(n) < 64 &&
22422 Matcher::vector_element_basic_type(n) == T_INT);
22423 match(Set dst (RoundVF src));
22424 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22425 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22426 ins_encode %{
22427 int vlen_enc = vector_length_encoding(this);
22428 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22429 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22430 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22431 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22432 %}
22433 ins_pipe( pipe_slow );
22434 %}
22435
22436 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22437 predicate((VM_Version::supports_avx512vl() ||
22438 Matcher::vector_length_in_bytes(n) == 64) &&
22439 Matcher::vector_element_basic_type(n) == T_INT);
22440 match(Set dst (RoundVF src));
22441 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22442 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22443 ins_encode %{
22444 int vlen_enc = vector_length_encoding(this);
22445 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22446 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22447 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22448 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22449 %}
22450 ins_pipe( pipe_slow );
22451 %}
22452
22453 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22454 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22455 match(Set dst (RoundVD src));
22456 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22457 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22458 ins_encode %{
22459 int vlen_enc = vector_length_encoding(this);
22460 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22461 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22462 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22463 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22464 %}
22465 ins_pipe( pipe_slow );
22466 %}
22467
22468 // --------------------------------- VectorMaskCmp --------------------------------------
22469
22470 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22471 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22472 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22473 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22474 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22475 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22476 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22477 ins_encode %{
22478 int vlen_enc = vector_length_encoding(this, $src1);
22479 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22480 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22481 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22482 } else {
22483 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22484 }
22485 %}
22486 ins_pipe( pipe_slow );
22487 %}
22488
22489 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22490 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22491 n->bottom_type()->isa_vectmask() == nullptr &&
22492 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22493 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22494 effect(TEMP ktmp);
22495 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22496 ins_encode %{
22497 int vlen_enc = Assembler::AVX_512bit;
22498 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22499 KRegister mask = k0; // The comparison itself is not being masked.
22500 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22501 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22502 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22503 } else {
22504 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22505 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22506 }
22507 %}
22508 ins_pipe( pipe_slow );
22509 %}
22510
22511 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22512 predicate(n->bottom_type()->isa_vectmask() &&
22513 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22514 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22515 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22516 ins_encode %{
22517 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22518 int vlen_enc = vector_length_encoding(this, $src1);
22519 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22520 KRegister mask = k0; // The comparison itself is not being masked.
22521 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22522 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22523 } else {
22524 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22525 }
22526 %}
22527 ins_pipe( pipe_slow );
22528 %}
22529
22530 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22531 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22532 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22533 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22534 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22535 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22536 (n->in(2)->get_int() == BoolTest::eq ||
22537 n->in(2)->get_int() == BoolTest::lt ||
22538 n->in(2)->get_int() == BoolTest::gt)); // cond
22539 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22540 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22541 ins_encode %{
22542 int vlen_enc = vector_length_encoding(this, $src1);
22543 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22544 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22545 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22546 %}
22547 ins_pipe( pipe_slow );
22548 %}
22549
22550 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22551 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22552 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22553 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22554 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22555 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22556 (n->in(2)->get_int() == BoolTest::ne ||
22557 n->in(2)->get_int() == BoolTest::le ||
22558 n->in(2)->get_int() == BoolTest::ge)); // cond
22559 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22560 effect(TEMP dst, TEMP xtmp);
22561 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22562 ins_encode %{
22563 int vlen_enc = vector_length_encoding(this, $src1);
22564 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22565 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22566 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22567 %}
22568 ins_pipe( pipe_slow );
22569 %}
22570
22571 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22572 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
22573 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22574 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22575 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22576 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22577 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22578 effect(TEMP dst, TEMP xtmp);
22579 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22580 ins_encode %{
22581 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22582 int vlen_enc = vector_length_encoding(this, $src1);
22583 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22584 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22585
22586 if (vlen_enc == Assembler::AVX_128bit) {
22587 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22588 } else {
22589 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22590 }
22591 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22592 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22593 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22594 %}
22595 ins_pipe( pipe_slow );
22596 %}
22597
22598 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22599 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
22600 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22601 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22602 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22603 effect(TEMP ktmp);
22604 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22605 ins_encode %{
22606 assert(UseAVX > 2, "required");
22607
22608 int vlen_enc = vector_length_encoding(this, $src1);
22609 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22610 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22611 KRegister mask = k0; // The comparison itself is not being masked.
22612 bool merge = false;
22613 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22614
22615 switch (src1_elem_bt) {
22616 case T_INT: {
22617 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22618 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22619 break;
22620 }
22621 case T_LONG: {
22622 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22623 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22624 break;
22625 }
22626 default: assert(false, "%s", type2name(src1_elem_bt));
22627 }
22628 %}
22629 ins_pipe( pipe_slow );
22630 %}
22631
22632
22633 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22634 predicate(n->bottom_type()->isa_vectmask() &&
22635 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22636 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22637 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22638 ins_encode %{
22639 assert(UseAVX > 2, "required");
22640 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
22641
22642 int vlen_enc = vector_length_encoding(this, $src1);
22643 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22644 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22645 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22646
22647 // Comparison i
22648 switch (src1_elem_bt) {
22649 case T_BYTE: {
22650 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22651 break;
22652 }
22653 case T_SHORT: {
22654 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22655 break;
22656 }
22657 case T_INT: {
22658 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22659 break;
22660 }
22661 case T_LONG: {
22662 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22663 break;
22664 }
22665 default: assert(false, "%s", type2name(src1_elem_bt));
22666 }
22667 %}
22668 ins_pipe( pipe_slow );
22669 %}
22670
22671 // Extract
22672
22673 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22674 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22675 match(Set dst (ExtractI src idx));
22676 match(Set dst (ExtractS src idx));
22677 match(Set dst (ExtractB src idx));
22678 format %{ "extractI $dst,$src,$idx\t!" %}
22679 ins_encode %{
22680 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22681
22682 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22683 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22684 %}
22685 ins_pipe( pipe_slow );
22686 %}
22687
22688 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22689 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22690 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22691 match(Set dst (ExtractI src idx));
22692 match(Set dst (ExtractS src idx));
22693 match(Set dst (ExtractB src idx));
22694 effect(TEMP vtmp);
22695 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22696 ins_encode %{
22697 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22698
22699 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22700 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22701 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22702 %}
22703 ins_pipe( pipe_slow );
22704 %}
22705
22706 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22707 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22708 match(Set dst (ExtractL src idx));
22709 format %{ "extractL $dst,$src,$idx\t!" %}
22710 ins_encode %{
22711 assert(UseSSE >= 4, "required");
22712 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22713
22714 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22715 %}
22716 ins_pipe( pipe_slow );
22717 %}
22718
22719 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22720 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22721 Matcher::vector_length(n->in(1)) == 8); // src
22722 match(Set dst (ExtractL src idx));
22723 effect(TEMP vtmp);
22724 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22725 ins_encode %{
22726 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22727
22728 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22729 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22730 %}
22731 ins_pipe( pipe_slow );
22732 %}
22733
22734 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22735 predicate(Matcher::vector_length(n->in(1)) <= 4);
22736 match(Set dst (ExtractF src idx));
22737 effect(TEMP dst, TEMP vtmp);
22738 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22739 ins_encode %{
22740 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22741
22742 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22743 %}
22744 ins_pipe( pipe_slow );
22745 %}
22746
22747 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22748 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22749 Matcher::vector_length(n->in(1)/*src*/) == 16);
22750 match(Set dst (ExtractF src idx));
22751 effect(TEMP vtmp);
22752 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22753 ins_encode %{
22754 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22755
22756 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22757 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22758 %}
22759 ins_pipe( pipe_slow );
22760 %}
22761
22762 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22763 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22764 match(Set dst (ExtractD src idx));
22765 format %{ "extractD $dst,$src,$idx\t!" %}
22766 ins_encode %{
22767 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22768
22769 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22770 %}
22771 ins_pipe( pipe_slow );
22772 %}
22773
22774 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22775 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22776 Matcher::vector_length(n->in(1)) == 8); // src
22777 match(Set dst (ExtractD src idx));
22778 effect(TEMP vtmp);
22779 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22780 ins_encode %{
22781 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22782
22783 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22784 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22785 %}
22786 ins_pipe( pipe_slow );
22787 %}
22788
22789 // --------------------------------- Vector Blend --------------------------------------
22790
22791 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22792 predicate(UseAVX == 0);
22793 match(Set dst (VectorBlend (Binary dst src) mask));
22794 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22795 effect(TEMP tmp);
22796 ins_encode %{
22797 assert(UseSSE >= 4, "required");
22798
22799 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22800 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22801 }
22802 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22803 %}
22804 ins_pipe( pipe_slow );
22805 %}
22806
22807 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22808 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22809 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22810 Matcher::vector_length_in_bytes(n) <= 32 &&
22811 is_integral_type(Matcher::vector_element_basic_type(n)));
22812 match(Set dst (VectorBlend (Binary src1 src2) mask));
22813 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22814 ins_encode %{
22815 int vlen_enc = vector_length_encoding(this);
22816 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22817 %}
22818 ins_pipe( pipe_slow );
22819 %}
22820
22821 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22822 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22823 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22824 Matcher::vector_length_in_bytes(n) <= 32 &&
22825 !is_integral_type(Matcher::vector_element_basic_type(n)));
22826 match(Set dst (VectorBlend (Binary src1 src2) mask));
22827 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22828 ins_encode %{
22829 int vlen_enc = vector_length_encoding(this);
22830 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22831 %}
22832 ins_pipe( pipe_slow );
22833 %}
22834
22835 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22836 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22837 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
22838 Matcher::vector_length_in_bytes(n) <= 32);
22839 match(Set dst (VectorBlend (Binary src1 src2) mask));
22840 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22841 effect(TEMP vtmp, TEMP dst);
22842 ins_encode %{
22843 int vlen_enc = vector_length_encoding(this);
22844 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22845 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22846 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22847 %}
22848 ins_pipe( pipe_slow );
22849 %}
22850
22851 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22852 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22853 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
22854 match(Set dst (VectorBlend (Binary src1 src2) mask));
22855 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22856 effect(TEMP ktmp);
22857 ins_encode %{
22858 int vlen_enc = Assembler::AVX_512bit;
22859 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22860 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22861 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22862 %}
22863 ins_pipe( pipe_slow );
22864 %}
22865
22866
22867 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22868 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
22869 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22870 VM_Version::supports_avx512bw()));
22871 match(Set dst (VectorBlend (Binary src1 src2) mask));
22872 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22873 ins_encode %{
22874 int vlen_enc = vector_length_encoding(this);
22875 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22876 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22877 %}
22878 ins_pipe( pipe_slow );
22879 %}
22880
22881 // --------------------------------- ABS --------------------------------------
22882 // a = |a|
22883 instruct vabsB_reg(vec dst, vec src) %{
22884 match(Set dst (AbsVB src));
22885 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22886 ins_encode %{
22887 uint vlen = Matcher::vector_length(this);
22888 if (vlen <= 16) {
22889 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22890 } else {
22891 int vlen_enc = vector_length_encoding(this);
22892 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22893 }
22894 %}
22895 ins_pipe( pipe_slow );
22896 %}
22897
22898 instruct vabsS_reg(vec dst, vec src) %{
22899 match(Set dst (AbsVS src));
22900 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22901 ins_encode %{
22902 uint vlen = Matcher::vector_length(this);
22903 if (vlen <= 8) {
22904 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22905 } else {
22906 int vlen_enc = vector_length_encoding(this);
22907 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22908 }
22909 %}
22910 ins_pipe( pipe_slow );
22911 %}
22912
22913 instruct vabsI_reg(vec dst, vec src) %{
22914 match(Set dst (AbsVI src));
22915 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22916 ins_encode %{
22917 uint vlen = Matcher::vector_length(this);
22918 if (vlen <= 4) {
22919 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22920 } else {
22921 int vlen_enc = vector_length_encoding(this);
22922 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22923 }
22924 %}
22925 ins_pipe( pipe_slow );
22926 %}
22927
22928 instruct vabsL_reg(vec dst, vec src) %{
22929 match(Set dst (AbsVL src));
22930 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22931 ins_encode %{
22932 assert(UseAVX > 2, "required");
22933 int vlen_enc = vector_length_encoding(this);
22934 if (!VM_Version::supports_avx512vl()) {
22935 vlen_enc = Assembler::AVX_512bit;
22936 }
22937 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22938 %}
22939 ins_pipe( pipe_slow );
22940 %}
22941
22942 // --------------------------------- ABSNEG --------------------------------------
22943
22944 instruct vabsnegF(vec dst, vec src) %{
22945 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22946 match(Set dst (AbsVF src));
22947 match(Set dst (NegVF src));
22948 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22949 ins_cost(150);
22950 ins_encode %{
22951 int opcode = this->ideal_Opcode();
22952 int vlen = Matcher::vector_length(this);
22953 if (vlen == 2) {
22954 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22955 } else {
22956 assert(vlen == 8 || vlen == 16, "required");
22957 int vlen_enc = vector_length_encoding(this);
22958 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22959 }
22960 %}
22961 ins_pipe( pipe_slow );
22962 %}
22963
22964 instruct vabsneg4F(vec dst) %{
22965 predicate(Matcher::vector_length(n) == 4);
22966 match(Set dst (AbsVF dst));
22967 match(Set dst (NegVF dst));
22968 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22969 ins_cost(150);
22970 ins_encode %{
22971 int opcode = this->ideal_Opcode();
22972 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22973 %}
22974 ins_pipe( pipe_slow );
22975 %}
22976
22977 instruct vabsnegD(vec dst, vec src) %{
22978 match(Set dst (AbsVD src));
22979 match(Set dst (NegVD src));
22980 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22981 ins_encode %{
22982 int opcode = this->ideal_Opcode();
22983 uint vlen = Matcher::vector_length(this);
22984 if (vlen == 2) {
22985 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22986 } else {
22987 int vlen_enc = vector_length_encoding(this);
22988 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22989 }
22990 %}
22991 ins_pipe( pipe_slow );
22992 %}
22993
22994 //------------------------------------- VectorTest --------------------------------------------
22995
22996 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22997 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22998 match(Set cr (VectorTest src1 src2));
22999 effect(TEMP vtmp);
23000 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
23001 ins_encode %{
23002 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23003 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23004 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23005 %}
23006 ins_pipe( pipe_slow );
23007 %}
23008
23009 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23010 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23011 match(Set cr (VectorTest src1 src2));
23012 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23013 ins_encode %{
23014 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23015 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23016 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23017 %}
23018 ins_pipe( pipe_slow );
23019 %}
23020
23021 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23022 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23023 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23024 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23025 match(Set cr (VectorTest src1 src2));
23026 effect(TEMP tmp);
23027 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23028 ins_encode %{
23029 uint masklen = Matcher::vector_length(this, $src1);
23030 __ kmovwl($tmp$$Register, $src1$$KRegister);
23031 __ andl($tmp$$Register, (1 << masklen) - 1);
23032 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23033 %}
23034 ins_pipe( pipe_slow );
23035 %}
23036
23037 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23038 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23039 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23040 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23041 match(Set cr (VectorTest src1 src2));
23042 effect(TEMP tmp);
23043 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23044 ins_encode %{
23045 uint masklen = Matcher::vector_length(this, $src1);
23046 __ kmovwl($tmp$$Register, $src1$$KRegister);
23047 __ andl($tmp$$Register, (1 << masklen) - 1);
23048 %}
23049 ins_pipe( pipe_slow );
23050 %}
23051
23052 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23053 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23054 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23055 match(Set cr (VectorTest src1 src2));
23056 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23057 ins_encode %{
23058 uint masklen = Matcher::vector_length(this, $src1);
23059 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23060 %}
23061 ins_pipe( pipe_slow );
23062 %}
23063
23064 //------------------------------------- LoadMask --------------------------------------------
23065
23066 instruct loadMask(legVec dst, legVec src) %{
23067 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23068 match(Set dst (VectorLoadMask src));
23069 effect(TEMP dst);
23070 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23071 ins_encode %{
23072 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23073 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23074 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23075 %}
23076 ins_pipe( pipe_slow );
23077 %}
23078
23079 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23080 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23081 match(Set dst (VectorLoadMask src));
23082 effect(TEMP xtmp);
23083 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23084 ins_encode %{
23085 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23086 true, Assembler::AVX_512bit);
23087 %}
23088 ins_pipe( pipe_slow );
23089 %}
23090
23091 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23092 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23093 match(Set dst (VectorLoadMask src));
23094 effect(TEMP xtmp);
23095 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23096 ins_encode %{
23097 int vlen_enc = vector_length_encoding(in(1));
23098 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23099 false, vlen_enc);
23100 %}
23101 ins_pipe( pipe_slow );
23102 %}
23103
23104 //------------------------------------- StoreMask --------------------------------------------
23105
23106 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23107 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23108 match(Set dst (VectorStoreMask src size));
23109 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23110 ins_encode %{
23111 int vlen = Matcher::vector_length(this);
23112 if (vlen <= 16 && UseAVX <= 2) {
23113 assert(UseSSE >= 3, "required");
23114 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23115 } else {
23116 assert(UseAVX > 0, "required");
23117 int src_vlen_enc = vector_length_encoding(this, $src);
23118 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23119 }
23120 %}
23121 ins_pipe( pipe_slow );
23122 %}
23123
23124 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23125 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23126 match(Set dst (VectorStoreMask src size));
23127 effect(TEMP_DEF dst, TEMP xtmp);
23128 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23129 ins_encode %{
23130 int vlen_enc = Assembler::AVX_128bit;
23131 int vlen = Matcher::vector_length(this);
23132 if (vlen <= 8) {
23133 assert(UseSSE >= 3, "required");
23134 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23135 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23136 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23137 } else {
23138 assert(UseAVX > 0, "required");
23139 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23140 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23141 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23142 }
23143 %}
23144 ins_pipe( pipe_slow );
23145 %}
23146
23147 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23148 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23149 match(Set dst (VectorStoreMask src size));
23150 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23151 effect(TEMP_DEF dst, TEMP xtmp);
23152 ins_encode %{
23153 int vlen_enc = Assembler::AVX_128bit;
23154 int vlen = Matcher::vector_length(this);
23155 if (vlen <= 4) {
23156 assert(UseSSE >= 3, "required");
23157 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23158 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23159 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23160 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23161 } else {
23162 assert(UseAVX > 0, "required");
23163 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23164 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23165 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23166 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23167 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23168 }
23169 %}
23170 ins_pipe( pipe_slow );
23171 %}
23172
23173 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23174 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23175 match(Set dst (VectorStoreMask src size));
23176 effect(TEMP_DEF dst, TEMP xtmp);
23177 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23178 ins_encode %{
23179 assert(UseSSE >= 3, "required");
23180 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23181 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23182 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23183 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23184 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23185 %}
23186 ins_pipe( pipe_slow );
23187 %}
23188
23189 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23190 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23191 match(Set dst (VectorStoreMask src size));
23192 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23193 effect(TEMP_DEF dst, TEMP vtmp);
23194 ins_encode %{
23195 int vlen_enc = Assembler::AVX_128bit;
23196 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23197 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23198 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23199 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23200 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23201 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23202 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23203 %}
23204 ins_pipe( pipe_slow );
23205 %}
23206
23207 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23208 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23209 match(Set dst (VectorStoreMask src size));
23210 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23211 ins_encode %{
23212 int src_vlen_enc = vector_length_encoding(this, $src);
23213 int dst_vlen_enc = vector_length_encoding(this);
23214 if (!VM_Version::supports_avx512vl()) {
23215 src_vlen_enc = Assembler::AVX_512bit;
23216 }
23217 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23218 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23219 %}
23220 ins_pipe( pipe_slow );
23221 %}
23222
23223 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23224 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
23225 match(Set dst (VectorStoreMask src size));
23226 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23227 ins_encode %{
23228 int src_vlen_enc = vector_length_encoding(this, $src);
23229 int dst_vlen_enc = vector_length_encoding(this);
23230 if (!VM_Version::supports_avx512vl()) {
23231 src_vlen_enc = Assembler::AVX_512bit;
23232 }
23233 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23234 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23235 %}
23236 ins_pipe( pipe_slow );
23237 %}
23238
23239 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23240 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
23241 match(Set dst (VectorStoreMask mask size));
23242 effect(TEMP_DEF dst);
23243 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23244 ins_encode %{
23245 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23246 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23247 false, Assembler::AVX_512bit, noreg);
23248 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23249 %}
23250 ins_pipe( pipe_slow );
23251 %}
23252
23253 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23254 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
23255 match(Set dst (VectorStoreMask mask size));
23256 effect(TEMP_DEF dst);
23257 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23258 ins_encode %{
23259 int dst_vlen_enc = vector_length_encoding(this);
23260 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23261 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23262 %}
23263 ins_pipe( pipe_slow );
23264 %}
23265
23266 instruct vmaskcast_evex(kReg dst) %{
23267 match(Set dst (VectorMaskCast dst));
23268 ins_cost(0);
23269 format %{ "vector_mask_cast $dst" %}
23270 ins_encode %{
23271 // empty
23272 %}
23273 ins_pipe(empty);
23274 %}
23275
23276 instruct vmaskcast(vec dst) %{
23277 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23278 match(Set dst (VectorMaskCast dst));
23279 ins_cost(0);
23280 format %{ "vector_mask_cast $dst" %}
23281 ins_encode %{
23282 // empty
23283 %}
23284 ins_pipe(empty);
23285 %}
23286
23287 instruct vmaskcast_avx(vec dst, vec src) %{
23288 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23289 match(Set dst (VectorMaskCast src));
23290 format %{ "vector_mask_cast $dst, $src" %}
23291 ins_encode %{
23292 int vlen = Matcher::vector_length(this);
23293 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23294 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23295 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23296 %}
23297 ins_pipe(pipe_slow);
23298 %}
23299
23300 //-------------------------------- Load Iota Indices ----------------------------------
23301
23302 instruct loadIotaIndices(vec dst, immI_0 src) %{
23303 match(Set dst (VectorLoadConst src));
23304 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23305 ins_encode %{
23306 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23307 BasicType bt = Matcher::vector_element_basic_type(this);
23308 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23309 %}
23310 ins_pipe( pipe_slow );
23311 %}
23312
23313 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23314 match(Set dst (PopulateIndex src1 src2));
23315 effect(TEMP dst, TEMP vtmp);
23316 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23317 ins_encode %{
23318 assert($src2$$constant == 1, "required");
23319 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23320 int vlen_enc = vector_length_encoding(this);
23321 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23322 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23323 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23324 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23325 %}
23326 ins_pipe( pipe_slow );
23327 %}
23328
23329 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23330 match(Set dst (PopulateIndex src1 src2));
23331 effect(TEMP dst, TEMP vtmp);
23332 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23333 ins_encode %{
23334 assert($src2$$constant == 1, "required");
23335 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23336 int vlen_enc = vector_length_encoding(this);
23337 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23338 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23339 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23340 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23341 %}
23342 ins_pipe( pipe_slow );
23343 %}
23344
23345 //-------------------------------- Rearrange ----------------------------------
23346
23347 // LoadShuffle/Rearrange for Byte
23348 instruct rearrangeB(vec dst, vec shuffle) %{
23349 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23350 Matcher::vector_length(n) < 32);
23351 match(Set dst (VectorRearrange dst shuffle));
23352 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23353 ins_encode %{
23354 assert(UseSSE >= 4, "required");
23355 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23356 %}
23357 ins_pipe( pipe_slow );
23358 %}
23359
23360 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23361 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23362 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23363 match(Set dst (VectorRearrange src shuffle));
23364 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23365 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23366 ins_encode %{
23367 assert(UseAVX >= 2, "required");
23368 // Swap src into vtmp1
23369 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23370 // Shuffle swapped src to get entries from other 128 bit lane
23371 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23372 // Shuffle original src to get entries from self 128 bit lane
23373 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23374 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23375 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23376 // Perform the blend
23377 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23378 %}
23379 ins_pipe( pipe_slow );
23380 %}
23381
23382
23383 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23384 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23385 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23386 match(Set dst (VectorRearrange src shuffle));
23387 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23388 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23389 ins_encode %{
23390 int vlen_enc = vector_length_encoding(this);
23391 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23392 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23393 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23394 %}
23395 ins_pipe( pipe_slow );
23396 %}
23397
23398 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23399 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23400 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23401 match(Set dst (VectorRearrange src shuffle));
23402 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23403 ins_encode %{
23404 int vlen_enc = vector_length_encoding(this);
23405 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23406 %}
23407 ins_pipe( pipe_slow );
23408 %}
23409
23410 // LoadShuffle/Rearrange for Short
23411
23412 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23413 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23414 !VM_Version::supports_avx512bw());
23415 match(Set dst (VectorLoadShuffle src));
23416 effect(TEMP dst, TEMP vtmp);
23417 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23418 ins_encode %{
23419 // Create a byte shuffle mask from short shuffle mask
23420 // only byte shuffle instruction available on these platforms
23421 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23422 if (UseAVX == 0) {
23423 assert(vlen_in_bytes <= 16, "required");
23424 // Multiply each shuffle by two to get byte index
23425 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23426 __ psllw($vtmp$$XMMRegister, 1);
23427
23428 // Duplicate to create 2 copies of byte index
23429 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23430 __ psllw($dst$$XMMRegister, 8);
23431 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23432
23433 // Add one to get alternate byte index
23434 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23435 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23436 } else {
23437 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23438 int vlen_enc = vector_length_encoding(this);
23439 // Multiply each shuffle by two to get byte index
23440 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23441
23442 // Duplicate to create 2 copies of byte index
23443 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23444 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23445
23446 // Add one to get alternate byte index
23447 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23448 }
23449 %}
23450 ins_pipe( pipe_slow );
23451 %}
23452
23453 instruct rearrangeS(vec dst, vec shuffle) %{
23454 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23455 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23456 match(Set dst (VectorRearrange dst shuffle));
23457 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23458 ins_encode %{
23459 assert(UseSSE >= 4, "required");
23460 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23461 %}
23462 ins_pipe( pipe_slow );
23463 %}
23464
23465 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23466 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23467 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23468 match(Set dst (VectorRearrange src shuffle));
23469 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23470 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23471 ins_encode %{
23472 assert(UseAVX >= 2, "required");
23473 // Swap src into vtmp1
23474 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23475 // Shuffle swapped src to get entries from other 128 bit lane
23476 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23477 // Shuffle original src to get entries from self 128 bit lane
23478 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23479 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23480 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23481 // Perform the blend
23482 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23483 %}
23484 ins_pipe( pipe_slow );
23485 %}
23486
23487 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23488 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23489 VM_Version::supports_avx512bw());
23490 match(Set dst (VectorRearrange src shuffle));
23491 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23492 ins_encode %{
23493 int vlen_enc = vector_length_encoding(this);
23494 if (!VM_Version::supports_avx512vl()) {
23495 vlen_enc = Assembler::AVX_512bit;
23496 }
23497 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23498 %}
23499 ins_pipe( pipe_slow );
23500 %}
23501
23502 // LoadShuffle/Rearrange for Integer and Float
23503
23504 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23505 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23506 Matcher::vector_length(n) == 4 && UseAVX == 0);
23507 match(Set dst (VectorLoadShuffle src));
23508 effect(TEMP dst, TEMP vtmp);
23509 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23510 ins_encode %{
23511 assert(UseSSE >= 4, "required");
23512
23513 // Create a byte shuffle mask from int shuffle mask
23514 // only byte shuffle instruction available on these platforms
23515
23516 // Duplicate and multiply each shuffle by 4
23517 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23518 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23519 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23520 __ psllw($vtmp$$XMMRegister, 2);
23521
23522 // Duplicate again to create 4 copies of byte index
23523 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23524 __ psllw($dst$$XMMRegister, 8);
23525 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23526
23527 // Add 3,2,1,0 to get alternate byte index
23528 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23529 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23530 %}
23531 ins_pipe( pipe_slow );
23532 %}
23533
23534 instruct rearrangeI(vec dst, vec shuffle) %{
23535 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23536 UseAVX == 0);
23537 match(Set dst (VectorRearrange dst shuffle));
23538 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23539 ins_encode %{
23540 assert(UseSSE >= 4, "required");
23541 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23542 %}
23543 ins_pipe( pipe_slow );
23544 %}
23545
23546 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23547 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23548 UseAVX > 0);
23549 match(Set dst (VectorRearrange src shuffle));
23550 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23551 ins_encode %{
23552 int vlen_enc = vector_length_encoding(this);
23553 BasicType bt = Matcher::vector_element_basic_type(this);
23554 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23555 %}
23556 ins_pipe( pipe_slow );
23557 %}
23558
23559 // LoadShuffle/Rearrange for Long and Double
23560
23561 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23562 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23563 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23564 match(Set dst (VectorLoadShuffle src));
23565 effect(TEMP dst, TEMP vtmp);
23566 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23567 ins_encode %{
23568 assert(UseAVX >= 2, "required");
23569
23570 int vlen_enc = vector_length_encoding(this);
23571 // Create a double word shuffle mask from long shuffle mask
23572 // only double word shuffle instruction available on these platforms
23573
23574 // Multiply each shuffle by two to get double word index
23575 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23576
23577 // Duplicate each double word shuffle
23578 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23579 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23580
23581 // Add one to get alternate double word index
23582 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23583 %}
23584 ins_pipe( pipe_slow );
23585 %}
23586
23587 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23588 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23589 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23590 match(Set dst (VectorRearrange src shuffle));
23591 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23592 ins_encode %{
23593 assert(UseAVX >= 2, "required");
23594
23595 int vlen_enc = vector_length_encoding(this);
23596 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23597 %}
23598 ins_pipe( pipe_slow );
23599 %}
23600
23601 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23602 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23603 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23604 match(Set dst (VectorRearrange src shuffle));
23605 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23606 ins_encode %{
23607 assert(UseAVX > 2, "required");
23608
23609 int vlen_enc = vector_length_encoding(this);
23610 if (vlen_enc == Assembler::AVX_128bit) {
23611 vlen_enc = Assembler::AVX_256bit;
23612 }
23613 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23614 %}
23615 ins_pipe( pipe_slow );
23616 %}
23617
23618 // --------------------------------- FMA --------------------------------------
23619 // a * b + c
23620
23621 instruct vfmaF_reg(vec a, vec b, vec c) %{
23622 match(Set c (FmaVF c (Binary a b)));
23623 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23624 ins_cost(150);
23625 ins_encode %{
23626 assert(UseFMA, "not enabled");
23627 int vlen_enc = vector_length_encoding(this);
23628 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23629 %}
23630 ins_pipe( pipe_slow );
23631 %}
23632
23633 instruct vfmaF_mem(vec a, memory b, vec c) %{
23634 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23635 match(Set c (FmaVF c (Binary a (LoadVector b))));
23636 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23637 ins_cost(150);
23638 ins_encode %{
23639 assert(UseFMA, "not enabled");
23640 int vlen_enc = vector_length_encoding(this);
23641 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23642 %}
23643 ins_pipe( pipe_slow );
23644 %}
23645
23646 instruct vfmaD_reg(vec a, vec b, vec c) %{
23647 match(Set c (FmaVD c (Binary a b)));
23648 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23649 ins_cost(150);
23650 ins_encode %{
23651 assert(UseFMA, "not enabled");
23652 int vlen_enc = vector_length_encoding(this);
23653 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23654 %}
23655 ins_pipe( pipe_slow );
23656 %}
23657
23658 instruct vfmaD_mem(vec a, memory b, vec c) %{
23659 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23660 match(Set c (FmaVD c (Binary a (LoadVector b))));
23661 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23662 ins_cost(150);
23663 ins_encode %{
23664 assert(UseFMA, "not enabled");
23665 int vlen_enc = vector_length_encoding(this);
23666 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23667 %}
23668 ins_pipe( pipe_slow );
23669 %}
23670
23671 // --------------------------------- Vector Multiply Add --------------------------------------
23672
23673 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23674 predicate(UseAVX == 0);
23675 match(Set dst (MulAddVS2VI dst src1));
23676 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23677 ins_encode %{
23678 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23679 %}
23680 ins_pipe( pipe_slow );
23681 %}
23682
23683 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23684 predicate(UseAVX > 0);
23685 match(Set dst (MulAddVS2VI src1 src2));
23686 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23687 ins_encode %{
23688 int vlen_enc = vector_length_encoding(this);
23689 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23690 %}
23691 ins_pipe( pipe_slow );
23692 %}
23693
23694 // --------------------------------- Vector Multiply Add Add ----------------------------------
23695
23696 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23697 predicate(VM_Version::supports_avx512_vnni());
23698 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23699 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23700 ins_encode %{
23701 assert(UseAVX > 2, "required");
23702 int vlen_enc = vector_length_encoding(this);
23703 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23704 %}
23705 ins_pipe( pipe_slow );
23706 ins_cost(10);
23707 %}
23708
23709 // --------------------------------- PopCount --------------------------------------
23710
23711 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23712 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23713 match(Set dst (PopCountVI src));
23714 match(Set dst (PopCountVL src));
23715 format %{ "vector_popcount_integral $dst, $src" %}
23716 ins_encode %{
23717 int opcode = this->ideal_Opcode();
23718 int vlen_enc = vector_length_encoding(this, $src);
23719 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23720 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23721 %}
23722 ins_pipe( pipe_slow );
23723 %}
23724
23725 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23726 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23727 match(Set dst (PopCountVI src mask));
23728 match(Set dst (PopCountVL src mask));
23729 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23730 ins_encode %{
23731 int vlen_enc = vector_length_encoding(this, $src);
23732 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23733 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23734 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23735 %}
23736 ins_pipe( pipe_slow );
23737 %}
23738
23739 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23740 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23741 match(Set dst (PopCountVI src));
23742 match(Set dst (PopCountVL src));
23743 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23744 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23745 ins_encode %{
23746 int opcode = this->ideal_Opcode();
23747 int vlen_enc = vector_length_encoding(this, $src);
23748 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23749 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23750 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23751 %}
23752 ins_pipe( pipe_slow );
23753 %}
23754
23755 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23756
23757 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23758 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23759 Matcher::vector_length_in_bytes(n->in(1))));
23760 match(Set dst (CountTrailingZerosV src));
23761 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23762 ins_cost(400);
23763 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23764 ins_encode %{
23765 int vlen_enc = vector_length_encoding(this, $src);
23766 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23767 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23768 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23769 %}
23770 ins_pipe( pipe_slow );
23771 %}
23772
23773 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23774 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23775 VM_Version::supports_avx512cd() &&
23776 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23777 match(Set dst (CountTrailingZerosV src));
23778 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23779 ins_cost(400);
23780 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23781 ins_encode %{
23782 int vlen_enc = vector_length_encoding(this, $src);
23783 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23784 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23785 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23786 %}
23787 ins_pipe( pipe_slow );
23788 %}
23789
23790 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23791 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23792 match(Set dst (CountTrailingZerosV src));
23793 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23794 ins_cost(400);
23795 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23796 ins_encode %{
23797 int vlen_enc = vector_length_encoding(this, $src);
23798 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23799 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23800 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23801 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23802 %}
23803 ins_pipe( pipe_slow );
23804 %}
23805
23806 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23807 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23808 match(Set dst (CountTrailingZerosV src));
23809 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23810 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23811 ins_encode %{
23812 int vlen_enc = vector_length_encoding(this, $src);
23813 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23814 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23815 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23816 %}
23817 ins_pipe( pipe_slow );
23818 %}
23819
23820
23821 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23822
23823 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23824 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23825 effect(TEMP dst);
23826 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23827 ins_encode %{
23828 int vector_len = vector_length_encoding(this);
23829 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23830 %}
23831 ins_pipe( pipe_slow );
23832 %}
23833
23834 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23835 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23836 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23837 effect(TEMP dst);
23838 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23839 ins_encode %{
23840 int vector_len = vector_length_encoding(this);
23841 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23842 %}
23843 ins_pipe( pipe_slow );
23844 %}
23845
23846 // --------------------------------- Rotation Operations ----------------------------------
23847 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23848 match(Set dst (RotateLeftV src shift));
23849 match(Set dst (RotateRightV src shift));
23850 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23851 ins_encode %{
23852 int opcode = this->ideal_Opcode();
23853 int vector_len = vector_length_encoding(this);
23854 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23855 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23856 %}
23857 ins_pipe( pipe_slow );
23858 %}
23859
23860 instruct vprorate(vec dst, vec src, vec shift) %{
23861 match(Set dst (RotateLeftV src shift));
23862 match(Set dst (RotateRightV src shift));
23863 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23864 ins_encode %{
23865 int opcode = this->ideal_Opcode();
23866 int vector_len = vector_length_encoding(this);
23867 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23868 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23869 %}
23870 ins_pipe( pipe_slow );
23871 %}
23872
23873 // ---------------------------------- Masked Operations ------------------------------------
23874 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23875 predicate(!n->in(3)->bottom_type()->isa_vectmask());
23876 match(Set dst (LoadVectorMasked mem mask));
23877 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23878 ins_encode %{
23879 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23880 int vlen_enc = vector_length_encoding(this);
23881 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23882 %}
23883 ins_pipe( pipe_slow );
23884 %}
23885
23886
23887 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23888 predicate(n->in(3)->bottom_type()->isa_vectmask());
23889 match(Set dst (LoadVectorMasked mem mask));
23890 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23891 ins_encode %{
23892 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23893 int vector_len = vector_length_encoding(this);
23894 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23895 %}
23896 ins_pipe( pipe_slow );
23897 %}
23898
23899 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23900 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask());
23901 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23902 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23903 ins_encode %{
23904 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23905 int vlen_enc = vector_length_encoding(src_node);
23906 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23907 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23908 %}
23909 ins_pipe( pipe_slow );
23910 %}
23911
23912 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23913 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask());
23914 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23915 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23916 ins_encode %{
23917 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23918 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23919 int vlen_enc = vector_length_encoding(src_node);
23920 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23921 %}
23922 ins_pipe( pipe_slow );
23923 %}
23924
23925 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23926 match(Set addr (VerifyVectorAlignment addr mask));
23927 effect(KILL cr);
23928 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23929 ins_encode %{
23930 Label Lskip;
23931 // check if masked bits of addr are zero
23932 __ testq($addr$$Register, $mask$$constant);
23933 __ jccb(Assembler::equal, Lskip);
23934 __ stop("verify_vector_alignment found a misaligned vector memory access");
23935 __ bind(Lskip);
23936 %}
23937 ins_pipe(pipe_slow);
23938 %}
23939
23940 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23941 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23942 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23943 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23944 ins_encode %{
23945 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23946 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23947
23948 Label DONE;
23949 int vlen_enc = vector_length_encoding(this, $src1);
23950 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23951
23952 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23953 __ mov64($dst$$Register, -1L);
23954 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23955 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23956 __ jccb(Assembler::carrySet, DONE);
23957 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23958 __ notq($dst$$Register);
23959 __ tzcntq($dst$$Register, $dst$$Register);
23960 __ bind(DONE);
23961 %}
23962 ins_pipe( pipe_slow );
23963 %}
23964
23965
23966 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23967 match(Set dst (VectorMaskGen len));
23968 effect(TEMP temp, KILL cr);
23969 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23970 ins_encode %{
23971 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23972 %}
23973 ins_pipe( pipe_slow );
23974 %}
23975
23976 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23977 match(Set dst (VectorMaskGen len));
23978 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23979 effect(TEMP temp);
23980 ins_encode %{
23981 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
23982 __ kmovql($dst$$KRegister, $temp$$Register);
23983 %}
23984 ins_pipe( pipe_slow );
23985 %}
23986
23987 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23988 predicate(n->in(1)->bottom_type()->isa_vectmask());
23989 match(Set dst (VectorMaskToLong mask));
23990 effect(TEMP dst, KILL cr);
23991 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23992 ins_encode %{
23993 int opcode = this->ideal_Opcode();
23994 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23995 int mask_len = Matcher::vector_length(this, $mask);
23996 int mask_size = mask_len * type2aelembytes(mbt);
23997 int vlen_enc = vector_length_encoding(this, $mask);
23998 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23999 $dst$$Register, mask_len, mask_size, vlen_enc);
24000 %}
24001 ins_pipe( pipe_slow );
24002 %}
24003
24004 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24005 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24006 match(Set dst (VectorMaskToLong mask));
24007 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24008 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24009 ins_encode %{
24010 int opcode = this->ideal_Opcode();
24011 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24012 int mask_len = Matcher::vector_length(this, $mask);
24013 int vlen_enc = vector_length_encoding(this, $mask);
24014 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24015 $dst$$Register, mask_len, mbt, vlen_enc);
24016 %}
24017 ins_pipe( pipe_slow );
24018 %}
24019
24020 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24021 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24022 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24023 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24024 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24025 ins_encode %{
24026 int opcode = this->ideal_Opcode();
24027 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24028 int mask_len = Matcher::vector_length(this, $mask);
24029 int vlen_enc = vector_length_encoding(this, $mask);
24030 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24031 $dst$$Register, mask_len, mbt, vlen_enc);
24032 %}
24033 ins_pipe( pipe_slow );
24034 %}
24035
24036 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24037 predicate(n->in(1)->bottom_type()->isa_vectmask());
24038 match(Set dst (VectorMaskTrueCount mask));
24039 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24040 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24041 ins_encode %{
24042 int opcode = this->ideal_Opcode();
24043 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24044 int mask_len = Matcher::vector_length(this, $mask);
24045 int mask_size = mask_len * type2aelembytes(mbt);
24046 int vlen_enc = vector_length_encoding(this, $mask);
24047 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24048 $tmp$$Register, mask_len, mask_size, vlen_enc);
24049 %}
24050 ins_pipe( pipe_slow );
24051 %}
24052
24053 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24054 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24055 match(Set dst (VectorMaskTrueCount mask));
24056 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24057 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24058 ins_encode %{
24059 int opcode = this->ideal_Opcode();
24060 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24061 int mask_len = Matcher::vector_length(this, $mask);
24062 int vlen_enc = vector_length_encoding(this, $mask);
24063 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24064 $tmp$$Register, mask_len, mbt, vlen_enc);
24065 %}
24066 ins_pipe( pipe_slow );
24067 %}
24068
24069 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24070 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24071 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24072 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24073 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24074 ins_encode %{
24075 int opcode = this->ideal_Opcode();
24076 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24077 int mask_len = Matcher::vector_length(this, $mask);
24078 int vlen_enc = vector_length_encoding(this, $mask);
24079 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24080 $tmp$$Register, mask_len, mbt, vlen_enc);
24081 %}
24082 ins_pipe( pipe_slow );
24083 %}
24084
24085 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24086 predicate(n->in(1)->bottom_type()->isa_vectmask());
24087 match(Set dst (VectorMaskFirstTrue mask));
24088 match(Set dst (VectorMaskLastTrue mask));
24089 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24090 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24091 ins_encode %{
24092 int opcode = this->ideal_Opcode();
24093 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24094 int mask_len = Matcher::vector_length(this, $mask);
24095 int mask_size = mask_len * type2aelembytes(mbt);
24096 int vlen_enc = vector_length_encoding(this, $mask);
24097 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24098 $tmp$$Register, mask_len, mask_size, vlen_enc);
24099 %}
24100 ins_pipe( pipe_slow );
24101 %}
24102
24103 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24104 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
24105 match(Set dst (VectorMaskFirstTrue mask));
24106 match(Set dst (VectorMaskLastTrue mask));
24107 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24108 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24109 ins_encode %{
24110 int opcode = this->ideal_Opcode();
24111 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24112 int mask_len = Matcher::vector_length(this, $mask);
24113 int vlen_enc = vector_length_encoding(this, $mask);
24114 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24115 $tmp$$Register, mask_len, mbt, vlen_enc);
24116 %}
24117 ins_pipe( pipe_slow );
24118 %}
24119
24120 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24121 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
24122 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24123 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24124 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24125 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24126 ins_encode %{
24127 int opcode = this->ideal_Opcode();
24128 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24129 int mask_len = Matcher::vector_length(this, $mask);
24130 int vlen_enc = vector_length_encoding(this, $mask);
24131 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24132 $tmp$$Register, mask_len, mbt, vlen_enc);
24133 %}
24134 ins_pipe( pipe_slow );
24135 %}
24136
24137 // --------------------------------- Compress/Expand Operations ---------------------------
24138 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24139 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24140 match(Set dst (CompressV src mask));
24141 match(Set dst (ExpandV src mask));
24142 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24143 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24144 ins_encode %{
24145 int opcode = this->ideal_Opcode();
24146 int vlen_enc = vector_length_encoding(this);
24147 BasicType bt = Matcher::vector_element_basic_type(this);
24148 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24149 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24150 %}
24151 ins_pipe( pipe_slow );
24152 %}
24153
24154 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24155 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24156 match(Set dst (CompressV src mask));
24157 match(Set dst (ExpandV src mask));
24158 format %{ "vector_compress_expand $dst, $src, $mask" %}
24159 ins_encode %{
24160 int opcode = this->ideal_Opcode();
24161 int vector_len = vector_length_encoding(this);
24162 BasicType bt = Matcher::vector_element_basic_type(this);
24163 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24164 %}
24165 ins_pipe( pipe_slow );
24166 %}
24167
24168 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24169 match(Set dst (CompressM mask));
24170 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24171 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24172 ins_encode %{
24173 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
24174 int mask_len = Matcher::vector_length(this);
24175 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24176 %}
24177 ins_pipe( pipe_slow );
24178 %}
24179
24180 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24181
24182 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24183 predicate(!VM_Version::supports_gfni());
24184 match(Set dst (ReverseV src));
24185 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24186 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24187 ins_encode %{
24188 int vec_enc = vector_length_encoding(this);
24189 BasicType bt = Matcher::vector_element_basic_type(this);
24190 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24191 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24192 %}
24193 ins_pipe( pipe_slow );
24194 %}
24195
24196 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24197 predicate(VM_Version::supports_gfni());
24198 match(Set dst (ReverseV src));
24199 effect(TEMP dst, TEMP xtmp);
24200 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24201 ins_encode %{
24202 int vec_enc = vector_length_encoding(this);
24203 BasicType bt = Matcher::vector_element_basic_type(this);
24204 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24205 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24206 $xtmp$$XMMRegister);
24207 %}
24208 ins_pipe( pipe_slow );
24209 %}
24210
24211 instruct vreverse_byte_reg(vec dst, vec src) %{
24212 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24213 match(Set dst (ReverseBytesV src));
24214 effect(TEMP dst);
24215 format %{ "vector_reverse_byte $dst, $src" %}
24216 ins_encode %{
24217 int vec_enc = vector_length_encoding(this);
24218 BasicType bt = Matcher::vector_element_basic_type(this);
24219 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24220 %}
24221 ins_pipe( pipe_slow );
24222 %}
24223
24224 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24225 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24226 match(Set dst (ReverseBytesV src));
24227 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24228 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24229 ins_encode %{
24230 int vec_enc = vector_length_encoding(this);
24231 BasicType bt = Matcher::vector_element_basic_type(this);
24232 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24233 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24234 %}
24235 ins_pipe( pipe_slow );
24236 %}
24237
24238 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24239
24240 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24241 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24242 Matcher::vector_length_in_bytes(n->in(1))));
24243 match(Set dst (CountLeadingZerosV src));
24244 format %{ "vector_count_leading_zeros $dst, $src" %}
24245 ins_encode %{
24246 int vlen_enc = vector_length_encoding(this, $src);
24247 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24248 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24249 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24250 %}
24251 ins_pipe( pipe_slow );
24252 %}
24253
24254 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24255 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24256 Matcher::vector_length_in_bytes(n->in(1))));
24257 match(Set dst (CountLeadingZerosV src mask));
24258 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24259 ins_encode %{
24260 int vlen_enc = vector_length_encoding(this, $src);
24261 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24262 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24263 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24264 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24265 %}
24266 ins_pipe( pipe_slow );
24267 %}
24268
24269 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24270 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24271 VM_Version::supports_avx512cd() &&
24272 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24273 match(Set dst (CountLeadingZerosV src));
24274 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24275 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24276 ins_encode %{
24277 int vlen_enc = vector_length_encoding(this, $src);
24278 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24279 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24280 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24281 %}
24282 ins_pipe( pipe_slow );
24283 %}
24284
24285 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24286 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24287 match(Set dst (CountLeadingZerosV src));
24288 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24289 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24290 ins_encode %{
24291 int vlen_enc = vector_length_encoding(this, $src);
24292 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24293 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24294 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24295 $rtmp$$Register, true, vlen_enc);
24296 %}
24297 ins_pipe( pipe_slow );
24298 %}
24299
24300 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24301 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24302 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24303 match(Set dst (CountLeadingZerosV src));
24304 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24305 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24306 ins_encode %{
24307 int vlen_enc = vector_length_encoding(this, $src);
24308 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24309 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24310 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24311 %}
24312 ins_pipe( pipe_slow );
24313 %}
24314
24315 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24316 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24317 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24318 match(Set dst (CountLeadingZerosV src));
24319 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24320 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24321 ins_encode %{
24322 int vlen_enc = vector_length_encoding(this, $src);
24323 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24324 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24325 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24326 %}
24327 ins_pipe( pipe_slow );
24328 %}
24329
24330 // ---------------------------------- Vector Masked Operations ------------------------------------
24331
24332 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24333 match(Set dst (AddVB (Binary dst src2) mask));
24334 match(Set dst (AddVS (Binary dst src2) mask));
24335 match(Set dst (AddVI (Binary dst src2) mask));
24336 match(Set dst (AddVL (Binary dst src2) mask));
24337 match(Set dst (AddVF (Binary dst src2) mask));
24338 match(Set dst (AddVD (Binary dst src2) mask));
24339 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24340 ins_encode %{
24341 int vlen_enc = vector_length_encoding(this);
24342 BasicType bt = Matcher::vector_element_basic_type(this);
24343 int opc = this->ideal_Opcode();
24344 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24345 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24346 %}
24347 ins_pipe( pipe_slow );
24348 %}
24349
24350 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24351 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24352 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24353 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24354 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24355 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24356 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24357 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24358 ins_encode %{
24359 int vlen_enc = vector_length_encoding(this);
24360 BasicType bt = Matcher::vector_element_basic_type(this);
24361 int opc = this->ideal_Opcode();
24362 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24363 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24364 %}
24365 ins_pipe( pipe_slow );
24366 %}
24367
24368 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24369 match(Set dst (XorV (Binary dst src2) mask));
24370 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24371 ins_encode %{
24372 int vlen_enc = vector_length_encoding(this);
24373 BasicType bt = Matcher::vector_element_basic_type(this);
24374 int opc = this->ideal_Opcode();
24375 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24376 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24377 %}
24378 ins_pipe( pipe_slow );
24379 %}
24380
24381 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24382 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24383 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24384 ins_encode %{
24385 int vlen_enc = vector_length_encoding(this);
24386 BasicType bt = Matcher::vector_element_basic_type(this);
24387 int opc = this->ideal_Opcode();
24388 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24389 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24390 %}
24391 ins_pipe( pipe_slow );
24392 %}
24393
24394 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24395 match(Set dst (OrV (Binary dst src2) mask));
24396 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24397 ins_encode %{
24398 int vlen_enc = vector_length_encoding(this);
24399 BasicType bt = Matcher::vector_element_basic_type(this);
24400 int opc = this->ideal_Opcode();
24401 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24402 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24403 %}
24404 ins_pipe( pipe_slow );
24405 %}
24406
24407 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24408 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24409 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24410 ins_encode %{
24411 int vlen_enc = vector_length_encoding(this);
24412 BasicType bt = Matcher::vector_element_basic_type(this);
24413 int opc = this->ideal_Opcode();
24414 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24415 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24416 %}
24417 ins_pipe( pipe_slow );
24418 %}
24419
24420 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24421 match(Set dst (AndV (Binary dst src2) mask));
24422 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24423 ins_encode %{
24424 int vlen_enc = vector_length_encoding(this);
24425 BasicType bt = Matcher::vector_element_basic_type(this);
24426 int opc = this->ideal_Opcode();
24427 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24428 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24429 %}
24430 ins_pipe( pipe_slow );
24431 %}
24432
24433 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24434 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24435 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24436 ins_encode %{
24437 int vlen_enc = vector_length_encoding(this);
24438 BasicType bt = Matcher::vector_element_basic_type(this);
24439 int opc = this->ideal_Opcode();
24440 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24441 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24442 %}
24443 ins_pipe( pipe_slow );
24444 %}
24445
24446 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24447 match(Set dst (SubVB (Binary dst src2) mask));
24448 match(Set dst (SubVS (Binary dst src2) mask));
24449 match(Set dst (SubVI (Binary dst src2) mask));
24450 match(Set dst (SubVL (Binary dst src2) mask));
24451 match(Set dst (SubVF (Binary dst src2) mask));
24452 match(Set dst (SubVD (Binary dst src2) mask));
24453 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24454 ins_encode %{
24455 int vlen_enc = vector_length_encoding(this);
24456 BasicType bt = Matcher::vector_element_basic_type(this);
24457 int opc = this->ideal_Opcode();
24458 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24459 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24460 %}
24461 ins_pipe( pipe_slow );
24462 %}
24463
24464 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24465 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24466 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24467 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24468 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24469 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24470 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24471 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24472 ins_encode %{
24473 int vlen_enc = vector_length_encoding(this);
24474 BasicType bt = Matcher::vector_element_basic_type(this);
24475 int opc = this->ideal_Opcode();
24476 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24477 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24478 %}
24479 ins_pipe( pipe_slow );
24480 %}
24481
24482 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24483 match(Set dst (MulVS (Binary dst src2) mask));
24484 match(Set dst (MulVI (Binary dst src2) mask));
24485 match(Set dst (MulVL (Binary dst src2) mask));
24486 match(Set dst (MulVF (Binary dst src2) mask));
24487 match(Set dst (MulVD (Binary dst src2) mask));
24488 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24489 ins_encode %{
24490 int vlen_enc = vector_length_encoding(this);
24491 BasicType bt = Matcher::vector_element_basic_type(this);
24492 int opc = this->ideal_Opcode();
24493 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24494 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24495 %}
24496 ins_pipe( pipe_slow );
24497 %}
24498
24499 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24500 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24501 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24502 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24503 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24504 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24505 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24506 ins_encode %{
24507 int vlen_enc = vector_length_encoding(this);
24508 BasicType bt = Matcher::vector_element_basic_type(this);
24509 int opc = this->ideal_Opcode();
24510 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24511 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24512 %}
24513 ins_pipe( pipe_slow );
24514 %}
24515
24516 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24517 match(Set dst (SqrtVF dst mask));
24518 match(Set dst (SqrtVD dst mask));
24519 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24520 ins_encode %{
24521 int vlen_enc = vector_length_encoding(this);
24522 BasicType bt = Matcher::vector_element_basic_type(this);
24523 int opc = this->ideal_Opcode();
24524 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24525 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24526 %}
24527 ins_pipe( pipe_slow );
24528 %}
24529
24530 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24531 match(Set dst (DivVF (Binary dst src2) mask));
24532 match(Set dst (DivVD (Binary dst src2) mask));
24533 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24534 ins_encode %{
24535 int vlen_enc = vector_length_encoding(this);
24536 BasicType bt = Matcher::vector_element_basic_type(this);
24537 int opc = this->ideal_Opcode();
24538 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24539 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24540 %}
24541 ins_pipe( pipe_slow );
24542 %}
24543
24544 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24545 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24546 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24547 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24548 ins_encode %{
24549 int vlen_enc = vector_length_encoding(this);
24550 BasicType bt = Matcher::vector_element_basic_type(this);
24551 int opc = this->ideal_Opcode();
24552 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24553 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24554 %}
24555 ins_pipe( pipe_slow );
24556 %}
24557
24558
24559 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24560 match(Set dst (RotateLeftV (Binary dst shift) mask));
24561 match(Set dst (RotateRightV (Binary dst shift) mask));
24562 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24563 ins_encode %{
24564 int vlen_enc = vector_length_encoding(this);
24565 BasicType bt = Matcher::vector_element_basic_type(this);
24566 int opc = this->ideal_Opcode();
24567 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24568 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24569 %}
24570 ins_pipe( pipe_slow );
24571 %}
24572
24573 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24574 match(Set dst (RotateLeftV (Binary dst src2) mask));
24575 match(Set dst (RotateRightV (Binary dst src2) mask));
24576 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24577 ins_encode %{
24578 int vlen_enc = vector_length_encoding(this);
24579 BasicType bt = Matcher::vector_element_basic_type(this);
24580 int opc = this->ideal_Opcode();
24581 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24582 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24583 %}
24584 ins_pipe( pipe_slow );
24585 %}
24586
24587 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24588 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24589 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24590 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24591 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24592 ins_encode %{
24593 int vlen_enc = vector_length_encoding(this);
24594 BasicType bt = Matcher::vector_element_basic_type(this);
24595 int opc = this->ideal_Opcode();
24596 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24597 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24598 %}
24599 ins_pipe( pipe_slow );
24600 %}
24601
24602 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24603 predicate(!n->as_ShiftV()->is_var_shift());
24604 match(Set dst (LShiftVS (Binary dst src2) mask));
24605 match(Set dst (LShiftVI (Binary dst src2) mask));
24606 match(Set dst (LShiftVL (Binary dst src2) mask));
24607 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24608 ins_encode %{
24609 int vlen_enc = vector_length_encoding(this);
24610 BasicType bt = Matcher::vector_element_basic_type(this);
24611 int opc = this->ideal_Opcode();
24612 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24613 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24614 %}
24615 ins_pipe( pipe_slow );
24616 %}
24617
24618 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24619 predicate(n->as_ShiftV()->is_var_shift());
24620 match(Set dst (LShiftVS (Binary dst src2) mask));
24621 match(Set dst (LShiftVI (Binary dst src2) mask));
24622 match(Set dst (LShiftVL (Binary dst src2) mask));
24623 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24624 ins_encode %{
24625 int vlen_enc = vector_length_encoding(this);
24626 BasicType bt = Matcher::vector_element_basic_type(this);
24627 int opc = this->ideal_Opcode();
24628 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24629 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24630 %}
24631 ins_pipe( pipe_slow );
24632 %}
24633
24634 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24635 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24636 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24637 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24638 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24639 ins_encode %{
24640 int vlen_enc = vector_length_encoding(this);
24641 BasicType bt = Matcher::vector_element_basic_type(this);
24642 int opc = this->ideal_Opcode();
24643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24644 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24645 %}
24646 ins_pipe( pipe_slow );
24647 %}
24648
24649 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24650 predicate(!n->as_ShiftV()->is_var_shift());
24651 match(Set dst (RShiftVS (Binary dst src2) mask));
24652 match(Set dst (RShiftVI (Binary dst src2) mask));
24653 match(Set dst (RShiftVL (Binary dst src2) mask));
24654 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24655 ins_encode %{
24656 int vlen_enc = vector_length_encoding(this);
24657 BasicType bt = Matcher::vector_element_basic_type(this);
24658 int opc = this->ideal_Opcode();
24659 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24660 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24661 %}
24662 ins_pipe( pipe_slow );
24663 %}
24664
24665 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24666 predicate(n->as_ShiftV()->is_var_shift());
24667 match(Set dst (RShiftVS (Binary dst src2) mask));
24668 match(Set dst (RShiftVI (Binary dst src2) mask));
24669 match(Set dst (RShiftVL (Binary dst src2) mask));
24670 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24671 ins_encode %{
24672 int vlen_enc = vector_length_encoding(this);
24673 BasicType bt = Matcher::vector_element_basic_type(this);
24674 int opc = this->ideal_Opcode();
24675 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24676 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24677 %}
24678 ins_pipe( pipe_slow );
24679 %}
24680
24681 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24682 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24683 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24684 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24685 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24686 ins_encode %{
24687 int vlen_enc = vector_length_encoding(this);
24688 BasicType bt = Matcher::vector_element_basic_type(this);
24689 int opc = this->ideal_Opcode();
24690 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24691 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24692 %}
24693 ins_pipe( pipe_slow );
24694 %}
24695
24696 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24697 predicate(!n->as_ShiftV()->is_var_shift());
24698 match(Set dst (URShiftVS (Binary dst src2) mask));
24699 match(Set dst (URShiftVI (Binary dst src2) mask));
24700 match(Set dst (URShiftVL (Binary dst src2) mask));
24701 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24702 ins_encode %{
24703 int vlen_enc = vector_length_encoding(this);
24704 BasicType bt = Matcher::vector_element_basic_type(this);
24705 int opc = this->ideal_Opcode();
24706 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24707 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24708 %}
24709 ins_pipe( pipe_slow );
24710 %}
24711
24712 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24713 predicate(n->as_ShiftV()->is_var_shift());
24714 match(Set dst (URShiftVS (Binary dst src2) mask));
24715 match(Set dst (URShiftVI (Binary dst src2) mask));
24716 match(Set dst (URShiftVL (Binary dst src2) mask));
24717 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24718 ins_encode %{
24719 int vlen_enc = vector_length_encoding(this);
24720 BasicType bt = Matcher::vector_element_basic_type(this);
24721 int opc = this->ideal_Opcode();
24722 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24723 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24724 %}
24725 ins_pipe( pipe_slow );
24726 %}
24727
24728 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24729 match(Set dst (MaxV (Binary dst src2) mask));
24730 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24731 ins_encode %{
24732 int vlen_enc = vector_length_encoding(this);
24733 BasicType bt = Matcher::vector_element_basic_type(this);
24734 int opc = this->ideal_Opcode();
24735 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24736 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24737 %}
24738 ins_pipe( pipe_slow );
24739 %}
24740
24741 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24742 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24743 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24744 ins_encode %{
24745 int vlen_enc = vector_length_encoding(this);
24746 BasicType bt = Matcher::vector_element_basic_type(this);
24747 int opc = this->ideal_Opcode();
24748 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24749 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24750 %}
24751 ins_pipe( pipe_slow );
24752 %}
24753
24754 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24755 match(Set dst (MinV (Binary dst src2) mask));
24756 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24757 ins_encode %{
24758 int vlen_enc = vector_length_encoding(this);
24759 BasicType bt = Matcher::vector_element_basic_type(this);
24760 int opc = this->ideal_Opcode();
24761 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24762 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24763 %}
24764 ins_pipe( pipe_slow );
24765 %}
24766
24767 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24768 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24769 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24770 ins_encode %{
24771 int vlen_enc = vector_length_encoding(this);
24772 BasicType bt = Matcher::vector_element_basic_type(this);
24773 int opc = this->ideal_Opcode();
24774 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24775 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24776 %}
24777 ins_pipe( pipe_slow );
24778 %}
24779
24780 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24781 match(Set dst (VectorRearrange (Binary dst src2) mask));
24782 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24783 ins_encode %{
24784 int vlen_enc = vector_length_encoding(this);
24785 BasicType bt = Matcher::vector_element_basic_type(this);
24786 int opc = this->ideal_Opcode();
24787 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24788 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24789 %}
24790 ins_pipe( pipe_slow );
24791 %}
24792
24793 instruct vabs_masked(vec dst, kReg mask) %{
24794 match(Set dst (AbsVB dst mask));
24795 match(Set dst (AbsVS dst mask));
24796 match(Set dst (AbsVI dst mask));
24797 match(Set dst (AbsVL dst mask));
24798 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24799 ins_encode %{
24800 int vlen_enc = vector_length_encoding(this);
24801 BasicType bt = Matcher::vector_element_basic_type(this);
24802 int opc = this->ideal_Opcode();
24803 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24804 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24805 %}
24806 ins_pipe( pipe_slow );
24807 %}
24808
24809 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24810 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24811 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24812 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24813 ins_encode %{
24814 assert(UseFMA, "Needs FMA instructions support.");
24815 int vlen_enc = vector_length_encoding(this);
24816 BasicType bt = Matcher::vector_element_basic_type(this);
24817 int opc = this->ideal_Opcode();
24818 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24819 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24820 %}
24821 ins_pipe( pipe_slow );
24822 %}
24823
24824 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24825 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24826 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24827 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24828 ins_encode %{
24829 assert(UseFMA, "Needs FMA instructions support.");
24830 int vlen_enc = vector_length_encoding(this);
24831 BasicType bt = Matcher::vector_element_basic_type(this);
24832 int opc = this->ideal_Opcode();
24833 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24834 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24835 %}
24836 ins_pipe( pipe_slow );
24837 %}
24838
24839 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24840 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24841 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24842 ins_encode %{
24843 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
24844 int vlen_enc = vector_length_encoding(this, $src1);
24845 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24846
24847 // Comparison i
24848 switch (src1_elem_bt) {
24849 case T_BYTE: {
24850 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24851 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24852 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24853 break;
24854 }
24855 case T_SHORT: {
24856 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24857 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24858 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24859 break;
24860 }
24861 case T_INT: {
24862 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24863 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24864 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24865 break;
24866 }
24867 case T_LONG: {
24868 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24869 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24870 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24871 break;
24872 }
24873 case T_FLOAT: {
24874 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24875 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24876 break;
24877 }
24878 case T_DOUBLE: {
24879 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24880 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24881 break;
24882 }
24883 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24884 }
24885 %}
24886 ins_pipe( pipe_slow );
24887 %}
24888
24889 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24890 predicate(Matcher::vector_length(n) <= 32);
24891 match(Set dst (MaskAll src));
24892 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24893 ins_encode %{
24894 int mask_len = Matcher::vector_length(this);
24895 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24896 %}
24897 ins_pipe( pipe_slow );
24898 %}
24899
24900 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24901 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24902 match(Set dst (XorVMask src (MaskAll cnt)));
24903 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24904 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24905 ins_encode %{
24906 uint masklen = Matcher::vector_length(this);
24907 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24908 %}
24909 ins_pipe( pipe_slow );
24910 %}
24911
24912 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24913 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24914 (Matcher::vector_length(n) == 16) ||
24915 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24916 match(Set dst (XorVMask src (MaskAll cnt)));
24917 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24918 ins_encode %{
24919 uint masklen = Matcher::vector_length(this);
24920 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24921 %}
24922 ins_pipe( pipe_slow );
24923 %}
24924
24925 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
24926 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
24927 match(Set dst (VectorLongToMask src));
24928 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
24929 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
24930 ins_encode %{
24931 int mask_len = Matcher::vector_length(this);
24932 int vec_enc = vector_length_encoding(mask_len);
24933 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24934 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24935 %}
24936 ins_pipe( pipe_slow );
24937 %}
24938
24939
24940 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24941 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
24942 match(Set dst (VectorLongToMask src));
24943 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24944 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24945 ins_encode %{
24946 int mask_len = Matcher::vector_length(this);
24947 assert(mask_len <= 32, "invalid mask length");
24948 int vec_enc = vector_length_encoding(mask_len);
24949 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24950 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24951 %}
24952 ins_pipe( pipe_slow );
24953 %}
24954
24955 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24956 predicate(n->bottom_type()->isa_vectmask());
24957 match(Set dst (VectorLongToMask src));
24958 format %{ "long_to_mask_evex $dst, $src\t!" %}
24959 ins_encode %{
24960 __ kmov($dst$$KRegister, $src$$Register);
24961 %}
24962 ins_pipe( pipe_slow );
24963 %}
24964
24965 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24966 match(Set dst (AndVMask src1 src2));
24967 match(Set dst (OrVMask src1 src2));
24968 match(Set dst (XorVMask src1 src2));
24969 effect(TEMP kscratch);
24970 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24971 ins_encode %{
24972 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24973 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24974 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24975 uint masklen = Matcher::vector_length(this);
24976 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24977 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24978 %}
24979 ins_pipe( pipe_slow );
24980 %}
24981
24982 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24983 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24984 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24985 ins_encode %{
24986 int vlen_enc = vector_length_encoding(this);
24987 BasicType bt = Matcher::vector_element_basic_type(this);
24988 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24989 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24990 %}
24991 ins_pipe( pipe_slow );
24992 %}
24993
24994 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24995 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24996 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24997 ins_encode %{
24998 int vlen_enc = vector_length_encoding(this);
24999 BasicType bt = Matcher::vector_element_basic_type(this);
25000 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25001 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25002 %}
25003 ins_pipe( pipe_slow );
25004 %}
25005
25006 instruct castMM(kReg dst)
25007 %{
25008 match(Set dst (CastVV dst));
25009
25010 size(0);
25011 format %{ "# castVV of $dst" %}
25012 ins_encode(/* empty encoding */);
25013 ins_cost(0);
25014 ins_pipe(empty);
25015 %}
25016
25017 instruct castVV(vec dst)
25018 %{
25019 match(Set dst (CastVV dst));
25020
25021 size(0);
25022 format %{ "# castVV of $dst" %}
25023 ins_encode(/* empty encoding */);
25024 ins_cost(0);
25025 ins_pipe(empty);
25026 %}
25027
25028 instruct castVVLeg(legVec dst)
25029 %{
25030 match(Set dst (CastVV dst));
25031
25032 size(0);
25033 format %{ "# castVV of $dst" %}
25034 ins_encode(/* empty encoding */);
25035 ins_cost(0);
25036 ins_pipe(empty);
25037 %}
25038
25039 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25040 %{
25041 match(Set dst (IsInfiniteF src));
25042 effect(TEMP ktmp, KILL cr);
25043 format %{ "float_class_check $dst, $src" %}
25044 ins_encode %{
25045 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25046 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25047 %}
25048 ins_pipe(pipe_slow);
25049 %}
25050
25051 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25052 %{
25053 match(Set dst (IsInfiniteD src));
25054 effect(TEMP ktmp, KILL cr);
25055 format %{ "double_class_check $dst, $src" %}
25056 ins_encode %{
25057 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25058 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25059 %}
25060 ins_pipe(pipe_slow);
25061 %}
25062
25063 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25064 %{
25065 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25066 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25067 match(Set dst (SaturatingAddV src1 src2));
25068 match(Set dst (SaturatingSubV src1 src2));
25069 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25070 ins_encode %{
25071 int vlen_enc = vector_length_encoding(this);
25072 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25073 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25074 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25075 %}
25076 ins_pipe(pipe_slow);
25077 %}
25078
25079 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25080 %{
25081 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25082 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25083 match(Set dst (SaturatingAddV src1 src2));
25084 match(Set dst (SaturatingSubV src1 src2));
25085 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25086 ins_encode %{
25087 int vlen_enc = vector_length_encoding(this);
25088 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25089 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25090 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25091 %}
25092 ins_pipe(pipe_slow);
25093 %}
25094
25095 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25096 %{
25097 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25098 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25099 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25100 match(Set dst (SaturatingAddV src1 src2));
25101 match(Set dst (SaturatingSubV src1 src2));
25102 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25103 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25104 ins_encode %{
25105 int vlen_enc = vector_length_encoding(this);
25106 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25107 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25108 $src1$$XMMRegister, $src2$$XMMRegister,
25109 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25110 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25111 %}
25112 ins_pipe(pipe_slow);
25113 %}
25114
25115 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25116 %{
25117 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25118 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25119 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25120 match(Set dst (SaturatingAddV src1 src2));
25121 match(Set dst (SaturatingSubV src1 src2));
25122 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25123 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25124 ins_encode %{
25125 int vlen_enc = vector_length_encoding(this);
25126 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25127 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25128 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25129 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25130 %}
25131 ins_pipe(pipe_slow);
25132 %}
25133
25134 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25135 %{
25136 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25137 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25138 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25139 match(Set dst (SaturatingAddV src1 src2));
25140 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25141 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25142 ins_encode %{
25143 int vlen_enc = vector_length_encoding(this);
25144 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25145 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25146 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25147 %}
25148 ins_pipe(pipe_slow);
25149 %}
25150
25151 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25152 %{
25153 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25154 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25155 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25156 match(Set dst (SaturatingAddV src1 src2));
25157 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25158 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25159 ins_encode %{
25160 int vlen_enc = vector_length_encoding(this);
25161 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25162 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25163 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25164 %}
25165 ins_pipe(pipe_slow);
25166 %}
25167
25168 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25169 %{
25170 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25171 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25172 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25173 match(Set dst (SaturatingSubV src1 src2));
25174 effect(TEMP ktmp);
25175 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25176 ins_encode %{
25177 int vlen_enc = vector_length_encoding(this);
25178 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25179 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25180 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25181 %}
25182 ins_pipe(pipe_slow);
25183 %}
25184
25185 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25186 %{
25187 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25188 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25189 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25190 match(Set dst (SaturatingSubV src1 src2));
25191 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25192 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25193 ins_encode %{
25194 int vlen_enc = vector_length_encoding(this);
25195 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25196 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25197 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25198 %}
25199 ins_pipe(pipe_slow);
25200 %}
25201
25202 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25203 %{
25204 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25205 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25206 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25207 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25208 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25209 ins_encode %{
25210 int vlen_enc = vector_length_encoding(this);
25211 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25212 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25213 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25214 %}
25215 ins_pipe(pipe_slow);
25216 %}
25217
25218 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25219 %{
25220 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25221 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25222 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25223 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25224 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25225 ins_encode %{
25226 int vlen_enc = vector_length_encoding(this);
25227 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25228 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25229 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25230 %}
25231 ins_pipe(pipe_slow);
25232 %}
25233
25234 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25235 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25236 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25237 match(Set dst (SaturatingAddV (Binary dst src) mask));
25238 match(Set dst (SaturatingSubV (Binary dst src) mask));
25239 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25240 ins_encode %{
25241 int vlen_enc = vector_length_encoding(this);
25242 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25243 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25244 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25245 %}
25246 ins_pipe( pipe_slow );
25247 %}
25248
25249 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25250 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25251 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25252 match(Set dst (SaturatingAddV (Binary dst src) mask));
25253 match(Set dst (SaturatingSubV (Binary dst src) mask));
25254 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25255 ins_encode %{
25256 int vlen_enc = vector_length_encoding(this);
25257 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25258 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25259 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25260 %}
25261 ins_pipe( pipe_slow );
25262 %}
25263
25264 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25265 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25266 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25267 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25268 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25269 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25270 ins_encode %{
25271 int vlen_enc = vector_length_encoding(this);
25272 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25273 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25274 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25275 %}
25276 ins_pipe( pipe_slow );
25277 %}
25278
25279 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25280 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25281 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25282 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25283 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25284 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25285 ins_encode %{
25286 int vlen_enc = vector_length_encoding(this);
25287 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25288 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25289 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25290 %}
25291 ins_pipe( pipe_slow );
25292 %}
25293
25294 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25295 %{
25296 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25297 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25298 ins_encode %{
25299 int vlen_enc = vector_length_encoding(this);
25300 BasicType bt = Matcher::vector_element_basic_type(this);
25301 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25302 %}
25303 ins_pipe(pipe_slow);
25304 %}
25305
25306 instruct reinterpretS2HF(regF dst, rRegI src)
25307 %{
25308 match(Set dst (ReinterpretS2HF src));
25309 format %{ "vmovw $dst, $src" %}
25310 ins_encode %{
25311 __ vmovw($dst$$XMMRegister, $src$$Register);
25312 %}
25313 ins_pipe(pipe_slow);
25314 %}
25315
25316 instruct reinterpretHF2S(rRegI dst, regF src)
25317 %{
25318 match(Set dst (ReinterpretHF2S src));
25319 format %{ "vmovw $dst, $src" %}
25320 ins_encode %{
25321 __ vmovw($dst$$Register, $src$$XMMRegister);
25322 %}
25323 ins_pipe(pipe_slow);
25324 %}
25325
25326 instruct convF2HFAndS2HF(regF dst, regF src)
25327 %{
25328 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25329 format %{ "convF2HFAndS2HF $dst, $src" %}
25330 ins_encode %{
25331 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25332 %}
25333 ins_pipe(pipe_slow);
25334 %}
25335
25336 instruct convHF2SAndHF2F(regF dst, regF src)
25337 %{
25338 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25339 format %{ "convHF2SAndHF2F $dst, $src" %}
25340 ins_encode %{
25341 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25342 %}
25343 ins_pipe(pipe_slow);
25344 %}
25345
25346 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25347 %{
25348 match(Set dst (SqrtHF src));
25349 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25350 ins_encode %{
25351 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25352 %}
25353 ins_pipe(pipe_slow);
25354 %}
25355
25356 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25357 %{
25358 match(Set dst (AddHF src1 src2));
25359 match(Set dst (DivHF src1 src2));
25360 match(Set dst (MulHF src1 src2));
25361 match(Set dst (SubHF src1 src2));
25362 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25363 ins_encode %{
25364 int opcode = this->ideal_Opcode();
25365 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25366 %}
25367 ins_pipe(pipe_slow);
25368 %}
25369
25370 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25371 %{
25372 predicate(VM_Version::supports_avx10_2());
25373 match(Set dst (MaxHF src1 src2));
25374 match(Set dst (MinHF src1 src2));
25375 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25376 ins_encode %{
25377 int function = this->ideal_Opcode() == Op_MinHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25378 __ eminmaxsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, function);
25379 %}
25380 ins_pipe( pipe_slow );
25381 %}
25382
25383 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25384 %{
25385 predicate(!VM_Version::supports_avx10_2());
25386 match(Set dst (MaxHF src1 src2));
25387 match(Set dst (MinHF src1 src2));
25388 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25389 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25390 ins_encode %{
25391 int opcode = this->ideal_Opcode();
25392 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25393 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25394 %}
25395 ins_pipe( pipe_slow );
25396 %}
25397
25398 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25399 %{
25400 match(Set dst (FmaHF src2 (Binary dst src1)));
25401 effect(DEF dst);
25402 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25403 ins_encode %{
25404 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25405 %}
25406 ins_pipe( pipe_slow );
25407 %}
25408
25409
25410 instruct vector_sqrt_HF_reg(vec dst, vec src)
25411 %{
25412 match(Set dst (SqrtVHF src));
25413 format %{ "vector_sqrt_fp16 $dst, $src" %}
25414 ins_encode %{
25415 int vlen_enc = vector_length_encoding(this);
25416 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25417 %}
25418 ins_pipe(pipe_slow);
25419 %}
25420
25421 instruct vector_sqrt_HF_mem(vec dst, memory src)
25422 %{
25423 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25424 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25425 ins_encode %{
25426 int vlen_enc = vector_length_encoding(this);
25427 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25428 %}
25429 ins_pipe(pipe_slow);
25430 %}
25431
25432 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25433 %{
25434 match(Set dst (AddVHF src1 src2));
25435 match(Set dst (DivVHF src1 src2));
25436 match(Set dst (MulVHF src1 src2));
25437 match(Set dst (SubVHF src1 src2));
25438 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25439 ins_encode %{
25440 int vlen_enc = vector_length_encoding(this);
25441 int opcode = this->ideal_Opcode();
25442 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25443 %}
25444 ins_pipe(pipe_slow);
25445 %}
25446
25447
25448 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25449 %{
25450 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25451 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25452 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25453 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25454 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25455 ins_encode %{
25456 int vlen_enc = vector_length_encoding(this);
25457 int opcode = this->ideal_Opcode();
25458 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25459 %}
25460 ins_pipe(pipe_slow);
25461 %}
25462
25463 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25464 %{
25465 match(Set dst (FmaVHF src2 (Binary dst src1)));
25466 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25467 ins_encode %{
25468 int vlen_enc = vector_length_encoding(this);
25469 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25470 %}
25471 ins_pipe( pipe_slow );
25472 %}
25473
25474 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25475 %{
25476 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25477 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25478 ins_encode %{
25479 int vlen_enc = vector_length_encoding(this);
25480 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25481 %}
25482 ins_pipe( pipe_slow );
25483 %}
25484
25485 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25486 %{
25487 predicate(VM_Version::supports_avx10_2());
25488 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25489 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25490 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25491 ins_encode %{
25492 int vlen_enc = vector_length_encoding(this);
25493 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25494 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$Address, true, function, vlen_enc);
25495 %}
25496 ins_pipe( pipe_slow );
25497 %}
25498
25499 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25500 %{
25501 predicate(VM_Version::supports_avx10_2());
25502 match(Set dst (MinVHF src1 src2));
25503 match(Set dst (MaxVHF src1 src2));
25504 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25505 ins_encode %{
25506 int vlen_enc = vector_length_encoding(this);
25507 int function = this->ideal_Opcode() == Op_MinVHF ? AVX10_2_MINMAX_MIN_COMPARE_SIGN : AVX10_2_MINMAX_MAX_COMPARE_SIGN;
25508 __ evminmaxph($dst$$XMMRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, true, function, vlen_enc);
25509 %}
25510 ins_pipe( pipe_slow );
25511 %}
25512
25513 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25514 %{
25515 predicate(!VM_Version::supports_avx10_2());
25516 match(Set dst (MinVHF src1 src2));
25517 match(Set dst (MaxVHF src1 src2));
25518 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520 ins_encode %{
25521 int vlen_enc = vector_length_encoding(this);
25522 int opcode = this->ideal_Opcode();
25523 __ vector_max_min_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25524 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25525 %}
25526 ins_pipe( pipe_slow );
25527 %}
25528
25529 //----------PEEPHOLE RULES-----------------------------------------------------
25530 // These must follow all instruction definitions as they use the names
25531 // defined in the instructions definitions.
25532 //
25533 // peeppredicate ( rule_predicate );
25534 // // the predicate unless which the peephole rule will be ignored
25535 //
25536 // peepmatch ( root_instr_name [preceding_instruction]* );
25537 //
25538 // peepprocedure ( procedure_name );
25539 // // provide a procedure name to perform the optimization, the procedure should
25540 // // reside in the architecture dependent peephole file, the method has the
25541 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25542 // // with the arguments being the basic block, the current node index inside the
25543 // // block, the register allocator, the functions upon invoked return a new node
25544 // // defined in peepreplace, and the rules of the nodes appearing in the
25545 // // corresponding peepmatch, the function return true if successful, else
25546 // // return false
25547 //
25548 // peepconstraint %{
25549 // (instruction_number.operand_name relational_op instruction_number.operand_name
25550 // [, ...] );
25551 // // instruction numbers are zero-based using left to right order in peepmatch
25552 //
25553 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25554 // // provide an instruction_number.operand_name for each operand that appears
25555 // // in the replacement instruction's match rule
25556 //
25557 // ---------VM FLAGS---------------------------------------------------------
25558 //
25559 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25560 //
25561 // Each peephole rule is given an identifying number starting with zero and
25562 // increasing by one in the order seen by the parser. An individual peephole
25563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25564 // on the command-line.
25565 //
25566 // ---------CURRENT LIMITATIONS----------------------------------------------
25567 //
25568 // Only transformations inside a basic block (do we need more for peephole)
25569 //
25570 // ---------EXAMPLE----------------------------------------------------------
25571 //
25572 // // pertinent parts of existing instructions in architecture description
25573 // instruct movI(rRegI dst, rRegI src)
25574 // %{
25575 // match(Set dst (CopyI src));
25576 // %}
25577 //
25578 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25579 // %{
25580 // match(Set dst (AddI dst src));
25581 // effect(KILL cr);
25582 // %}
25583 //
25584 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25585 // %{
25586 // match(Set dst (AddI dst src));
25587 // %}
25588 //
25589 // 1. Simple replacement
25590 // - Only match adjacent instructions in same basic block
25591 // - Only equality constraints
25592 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25593 // - Only one replacement instruction
25594 //
25595 // // Change (inc mov) to lea
25596 // peephole %{
25597 // // lea should only be emitted when beneficial
25598 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25599 // // increment preceded by register-register move
25600 // peepmatch ( incI_rReg movI );
25601 // // require that the destination register of the increment
25602 // // match the destination register of the move
25603 // peepconstraint ( 0.dst == 1.dst );
25604 // // construct a replacement instruction that sets
25605 // // the destination to ( move's source register + one )
25606 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25607 // %}
25608 //
25609 // 2. Procedural replacement
25610 // - More flexible finding relevent nodes
25611 // - More flexible constraints
25612 // - More flexible transformations
25613 // - May utilise architecture-dependent API more effectively
25614 // - Currently only one replacement instruction due to adlc parsing capabilities
25615 //
25616 // // Change (inc mov) to lea
25617 // peephole %{
25618 // // lea should only be emitted when beneficial
25619 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25620 // // the rule numbers of these nodes inside are passed into the function below
25621 // peepmatch ( incI_rReg movI );
25622 // // the method that takes the responsibility of transformation
25623 // peepprocedure ( inc_mov_to_lea );
25624 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25625 // // node is passed into the function above
25626 // peepreplace ( leaI_rReg_immI() );
25627 // %}
25628
25629 // These instructions is not matched by the matcher but used by the peephole
25630 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25631 %{
25632 predicate(false);
25633 match(Set dst (AddI src1 src2));
25634 format %{ "leal $dst, [$src1 + $src2]" %}
25635 ins_encode %{
25636 Register dst = $dst$$Register;
25637 Register src1 = $src1$$Register;
25638 Register src2 = $src2$$Register;
25639 if (src1 != rbp && src1 != r13) {
25640 __ leal(dst, Address(src1, src2, Address::times_1));
25641 } else {
25642 assert(src2 != rbp && src2 != r13, "");
25643 __ leal(dst, Address(src2, src1, Address::times_1));
25644 }
25645 %}
25646 ins_pipe(ialu_reg_reg);
25647 %}
25648
25649 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25650 %{
25651 predicate(false);
25652 match(Set dst (AddI src1 src2));
25653 format %{ "leal $dst, [$src1 + $src2]" %}
25654 ins_encode %{
25655 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25656 %}
25657 ins_pipe(ialu_reg_reg);
25658 %}
25659
25660 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25661 %{
25662 predicate(false);
25663 match(Set dst (LShiftI src shift));
25664 format %{ "leal $dst, [$src << $shift]" %}
25665 ins_encode %{
25666 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25667 Register src = $src$$Register;
25668 if (scale == Address::times_2 && src != rbp && src != r13) {
25669 __ leal($dst$$Register, Address(src, src, Address::times_1));
25670 } else {
25671 __ leal($dst$$Register, Address(noreg, src, scale));
25672 }
25673 %}
25674 ins_pipe(ialu_reg_reg);
25675 %}
25676
25677 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25678 %{
25679 predicate(false);
25680 match(Set dst (AddL src1 src2));
25681 format %{ "leaq $dst, [$src1 + $src2]" %}
25682 ins_encode %{
25683 Register dst = $dst$$Register;
25684 Register src1 = $src1$$Register;
25685 Register src2 = $src2$$Register;
25686 if (src1 != rbp && src1 != r13) {
25687 __ leaq(dst, Address(src1, src2, Address::times_1));
25688 } else {
25689 assert(src2 != rbp && src2 != r13, "");
25690 __ leaq(dst, Address(src2, src1, Address::times_1));
25691 }
25692 %}
25693 ins_pipe(ialu_reg_reg);
25694 %}
25695
25696 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25697 %{
25698 predicate(false);
25699 match(Set dst (AddL src1 src2));
25700 format %{ "leaq $dst, [$src1 + $src2]" %}
25701 ins_encode %{
25702 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25703 %}
25704 ins_pipe(ialu_reg_reg);
25705 %}
25706
25707 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25708 %{
25709 predicate(false);
25710 match(Set dst (LShiftL src shift));
25711 format %{ "leaq $dst, [$src << $shift]" %}
25712 ins_encode %{
25713 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25714 Register src = $src$$Register;
25715 if (scale == Address::times_2 && src != rbp && src != r13) {
25716 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25717 } else {
25718 __ leaq($dst$$Register, Address(noreg, src, scale));
25719 }
25720 %}
25721 ins_pipe(ialu_reg_reg);
25722 %}
25723
25724 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25725 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25726 // processors with at least partial ALU support for lea
25727 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25728 // beneficial for processors with full ALU support
25729 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25730
25731 peephole
25732 %{
25733 peeppredicate(VM_Version::supports_fast_2op_lea());
25734 peepmatch (addI_rReg);
25735 peepprocedure (lea_coalesce_reg);
25736 peepreplace (leaI_rReg_rReg_peep());
25737 %}
25738
25739 peephole
25740 %{
25741 peeppredicate(VM_Version::supports_fast_2op_lea());
25742 peepmatch (addI_rReg_imm);
25743 peepprocedure (lea_coalesce_imm);
25744 peepreplace (leaI_rReg_immI_peep());
25745 %}
25746
25747 peephole
25748 %{
25749 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25750 VM_Version::is_intel_cascade_lake());
25751 peepmatch (incI_rReg);
25752 peepprocedure (lea_coalesce_imm);
25753 peepreplace (leaI_rReg_immI_peep());
25754 %}
25755
25756 peephole
25757 %{
25758 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759 VM_Version::is_intel_cascade_lake());
25760 peepmatch (decI_rReg);
25761 peepprocedure (lea_coalesce_imm);
25762 peepreplace (leaI_rReg_immI_peep());
25763 %}
25764
25765 peephole
25766 %{
25767 peeppredicate(VM_Version::supports_fast_2op_lea());
25768 peepmatch (salI_rReg_immI2);
25769 peepprocedure (lea_coalesce_imm);
25770 peepreplace (leaI_rReg_immI2_peep());
25771 %}
25772
25773 peephole
25774 %{
25775 peeppredicate(VM_Version::supports_fast_2op_lea());
25776 peepmatch (addL_rReg);
25777 peepprocedure (lea_coalesce_reg);
25778 peepreplace (leaL_rReg_rReg_peep());
25779 %}
25780
25781 peephole
25782 %{
25783 peeppredicate(VM_Version::supports_fast_2op_lea());
25784 peepmatch (addL_rReg_imm);
25785 peepprocedure (lea_coalesce_imm);
25786 peepreplace (leaL_rReg_immL32_peep());
25787 %}
25788
25789 peephole
25790 %{
25791 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25792 VM_Version::is_intel_cascade_lake());
25793 peepmatch (incL_rReg);
25794 peepprocedure (lea_coalesce_imm);
25795 peepreplace (leaL_rReg_immL32_peep());
25796 %}
25797
25798 peephole
25799 %{
25800 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25801 VM_Version::is_intel_cascade_lake());
25802 peepmatch (decL_rReg);
25803 peepprocedure (lea_coalesce_imm);
25804 peepreplace (leaL_rReg_immL32_peep());
25805 %}
25806
25807 peephole
25808 %{
25809 peeppredicate(VM_Version::supports_fast_2op_lea());
25810 peepmatch (salL_rReg_immI2);
25811 peepprocedure (lea_coalesce_imm);
25812 peepreplace (leaL_rReg_immI2_peep());
25813 %}
25814
25815 peephole
25816 %{
25817 peepmatch (leaPCompressedOopOffset);
25818 peepprocedure (lea_remove_redundant);
25819 %}
25820
25821 peephole
25822 %{
25823 peepmatch (leaP8Narrow);
25824 peepprocedure (lea_remove_redundant);
25825 %}
25826
25827 peephole
25828 %{
25829 peepmatch (leaP32Narrow);
25830 peepprocedure (lea_remove_redundant);
25831 %}
25832
25833 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25834 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25835
25836 //int variant
25837 peephole
25838 %{
25839 peepmatch (testI_reg);
25840 peepprocedure (test_may_remove);
25841 %}
25842
25843 //long variant
25844 peephole
25845 %{
25846 peepmatch (testL_reg);
25847 peepprocedure (test_may_remove);
25848 %}
25849
25850
25851 //----------SMARTSPILL RULES---------------------------------------------------
25852 // These must follow all instruction definitions as they use the names
25853 // defined in the instructions definitions.