1 //
2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 //
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
8 //
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
14 //
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 //
19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 // or visit www.oracle.com if you need additional information or have any
21 // questions.
22 //
23 //
24
25 // X86 AMD64 Architecture Description File
26
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
30 // architecture.
31
32 register %{
33 //----------Architecture Description Register Definitions----------------------
34 // General Registers
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
38 //
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
42 //
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
46 //
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
50 // sites.
51 //
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
55 //
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
59 //
60 // The encoding number is the actual bit-pattern placed into the opcodes.
61
62 // General Registers
63 // R8-R15 must be encoded with REX. (RSP, RBP, RSI, RDI need REX when
64 // used as byte registers)
65
66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
69
70 reg_def RAX (SOC, SOC, Op_RegI, 0, rax->as_VMReg());
71 reg_def RAX_H(SOC, SOC, Op_RegI, 0, rax->as_VMReg()->next());
72
73 reg_def RCX (SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
74 reg_def RCX_H(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()->next());
75
76 reg_def RDX (SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
77 reg_def RDX_H(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()->next());
78
79 reg_def RBX (SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
80 reg_def RBX_H(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()->next());
81
82 reg_def RSP (NS, NS, Op_RegI, 4, rsp->as_VMReg());
83 reg_def RSP_H(NS, NS, Op_RegI, 4, rsp->as_VMReg()->next());
84
85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
86 reg_def RBP (NS, SOE, Op_RegI, 5, rbp->as_VMReg());
87 reg_def RBP_H(NS, SOE, Op_RegI, 5, rbp->as_VMReg()->next());
88
89 #ifdef _WIN64
90
91 reg_def RSI (SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
92 reg_def RSI_H(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()->next());
93
94 reg_def RDI (SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
95 reg_def RDI_H(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()->next());
96
97 #else
98
99 reg_def RSI (SOC, SOC, Op_RegI, 6, rsi->as_VMReg());
100 reg_def RSI_H(SOC, SOC, Op_RegI, 6, rsi->as_VMReg()->next());
101
102 reg_def RDI (SOC, SOC, Op_RegI, 7, rdi->as_VMReg());
103 reg_def RDI_H(SOC, SOC, Op_RegI, 7, rdi->as_VMReg()->next());
104
105 #endif
106
107 reg_def R8 (SOC, SOC, Op_RegI, 8, r8->as_VMReg());
108 reg_def R8_H (SOC, SOC, Op_RegI, 8, r8->as_VMReg()->next());
109
110 reg_def R9 (SOC, SOC, Op_RegI, 9, r9->as_VMReg());
111 reg_def R9_H (SOC, SOC, Op_RegI, 9, r9->as_VMReg()->next());
112
113 reg_def R10 (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
115
116 reg_def R11 (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
118
119 reg_def R12 (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
121
122 reg_def R13 (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
124
125 reg_def R14 (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
127
128 reg_def R15 (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
130
131 reg_def R16 (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
133
134 reg_def R17 (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
136
137 reg_def R18 (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
139
140 reg_def R19 (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
142
143 reg_def R20 (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
145
146 reg_def R21 (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
148
149 reg_def R22 (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
151
152 reg_def R23 (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
154
155 reg_def R24 (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
157
158 reg_def R25 (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
160
161 reg_def R26 (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
163
164 reg_def R27 (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
166
167 reg_def R28 (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
169
170 reg_def R29 (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
172
173 reg_def R30 (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
175
176 reg_def R31 (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
178
179 // Floating Point Registers
180
181 // Specify priority of register selection within phases of register
182 // allocation. Highest priority is first. A useful heuristic is to
183 // give registers a low priority when they are required by machine
184 // instructions, like EAX and EDX on I486, and choose no-save registers
185 // before save-on-call, & save-on-call before save-on-entry. Registers
186 // which participate in fixed calling sequences should come last.
187 // Registers which are used as pairs must fall on an even boundary.
188
189 alloc_class chunk0(R10, R10_H,
190 R11, R11_H,
191 R8, R8_H,
192 R9, R9_H,
193 R12, R12_H,
194 RCX, RCX_H,
195 RBX, RBX_H,
196 RDI, RDI_H,
197 RDX, RDX_H,
198 RSI, RSI_H,
199 RAX, RAX_H,
200 RBP, RBP_H,
201 R13, R13_H,
202 R14, R14_H,
203 R15, R15_H,
204 R16, R16_H,
205 R17, R17_H,
206 R18, R18_H,
207 R19, R19_H,
208 R20, R20_H,
209 R21, R21_H,
210 R22, R22_H,
211 R23, R23_H,
212 R24, R24_H,
213 R25, R25_H,
214 R26, R26_H,
215 R27, R27_H,
216 R28, R28_H,
217 R29, R29_H,
218 R30, R30_H,
219 R31, R31_H,
220 RSP, RSP_H);
221
222 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
223 // Word a in each register holds a Float, words ab hold a Double.
224 // The whole registers are used in SSE4.2 version intrinsics,
225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
226 // UseXMMForArrayCopy and UseSuperword flags).
227 // For pre EVEX enabled architectures:
228 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
229 // For EVEX enabled architectures:
230 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
231 //
232 // Linux ABI: No register preserved across function calls
233 // XMM0-XMM7 might hold parameters
234 // Windows ABI: XMM6-XMM15 preserved across function calls
235 // XMM0-XMM3 might hold parameters
236
237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
253
254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
270
271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
287
288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
304
305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
321
322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
338
339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
355
356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
372
373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
389
390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
406
407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
423
424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
440
441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
457
458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
474
475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
491
492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
508
509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
525
526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
542
543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
559
560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
576
577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
593
594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
610
611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
627
628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
644
645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
661
662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
678
679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
695
696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
712
713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
729
730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
746
747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
763
764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
780
781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
782
783 // AVX3 Mask Registers.
784 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
785 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
786
787 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
788 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
789
790 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
791 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
792
793 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
794 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
795
796 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
797 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
798
799 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
800 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
801
802 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
803 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
804
805
806 //----------Architecture Description Register Classes--------------------------
807 // Several register classes are automatically defined based upon information in
808 // this architecture description.
809 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
811 //
812
813 // Empty register class.
814 reg_class no_reg();
815
816 // Class for all pointer/long registers including APX extended GPRs.
817 reg_class all_reg(RAX, RAX_H,
818 RDX, RDX_H,
819 RBP, RBP_H,
820 RDI, RDI_H,
821 RSI, RSI_H,
822 RCX, RCX_H,
823 RBX, RBX_H,
824 RSP, RSP_H,
825 R8, R8_H,
826 R9, R9_H,
827 R10, R10_H,
828 R11, R11_H,
829 R12, R12_H,
830 R13, R13_H,
831 R14, R14_H,
832 R15, R15_H,
833 R16, R16_H,
834 R17, R17_H,
835 R18, R18_H,
836 R19, R19_H,
837 R20, R20_H,
838 R21, R21_H,
839 R22, R22_H,
840 R23, R23_H,
841 R24, R24_H,
842 R25, R25_H,
843 R26, R26_H,
844 R27, R27_H,
845 R28, R28_H,
846 R29, R29_H,
847 R30, R30_H,
848 R31, R31_H);
849
850 // Class for all int registers including APX extended GPRs.
851 reg_class all_int_reg(RAX
852 RDX,
853 RBP,
854 RDI,
855 RSI,
856 RCX,
857 RBX,
858 R8,
859 R9,
860 R10,
861 R11,
862 R12,
863 R13,
864 R14,
865 R16,
866 R17,
867 R18,
868 R19,
869 R20,
870 R21,
871 R22,
872 R23,
873 R24,
874 R25,
875 R26,
876 R27,
877 R28,
878 R29,
879 R30,
880 R31);
881
882 // Class for all pointer registers
883 reg_class any_reg %{
884 return _ANY_REG_mask;
885 %}
886
887 // Class for all pointer registers (excluding RSP)
888 reg_class ptr_reg %{
889 return _PTR_REG_mask;
890 %}
891
892 // Class for all pointer registers (excluding RSP and RBP)
893 reg_class ptr_reg_no_rbp %{
894 return _PTR_REG_NO_RBP_mask;
895 %}
896
897 // Class for all pointer registers (excluding RAX and RSP)
898 reg_class ptr_no_rax_reg %{
899 return _PTR_NO_RAX_REG_mask;
900 %}
901
902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
903 reg_class ptr_no_rax_rbx_reg %{
904 return _PTR_NO_RAX_RBX_REG_mask;
905 %}
906
907 // Class for all long registers (excluding RSP)
908 reg_class long_reg %{
909 return _LONG_REG_mask;
910 %}
911
912 // Class for all long registers (excluding RAX, RDX and RSP)
913 reg_class long_no_rax_rdx_reg %{
914 return _LONG_NO_RAX_RDX_REG_mask;
915 %}
916
917 // Class for all long registers (excluding RCX and RSP)
918 reg_class long_no_rcx_reg %{
919 return _LONG_NO_RCX_REG_mask;
920 %}
921
922 // Class for all long registers (excluding RBP and R13)
923 reg_class long_no_rbp_r13_reg %{
924 return _LONG_NO_RBP_R13_REG_mask;
925 %}
926
927 // Class for all int registers (excluding RSP)
928 reg_class int_reg %{
929 return _INT_REG_mask;
930 %}
931
932 // Class for all int registers (excluding RAX, RDX, and RSP)
933 reg_class int_no_rax_rdx_reg %{
934 return _INT_NO_RAX_RDX_REG_mask;
935 %}
936
937 // Class for all int registers (excluding RCX and RSP)
938 reg_class int_no_rcx_reg %{
939 return _INT_NO_RCX_REG_mask;
940 %}
941
942 // Class for all int registers (excluding RBP and R13)
943 reg_class int_no_rbp_r13_reg %{
944 return _INT_NO_RBP_R13_REG_mask;
945 %}
946
947 // Singleton class for RAX pointer register
948 reg_class ptr_rax_reg(RAX, RAX_H);
949
950 // Singleton class for RBX pointer register
951 reg_class ptr_rbx_reg(RBX, RBX_H);
952
953 // Singleton class for RSI pointer register
954 reg_class ptr_rsi_reg(RSI, RSI_H);
955
956 // Singleton class for RBP pointer register
957 reg_class ptr_rbp_reg(RBP, RBP_H);
958
959 // Singleton class for RDI pointer register
960 reg_class ptr_rdi_reg(RDI, RDI_H);
961
962 // Singleton class for stack pointer
963 reg_class ptr_rsp_reg(RSP, RSP_H);
964
965 // Singleton class for TLS pointer
966 reg_class ptr_r15_reg(R15, R15_H);
967
968 // Singleton class for RAX long register
969 reg_class long_rax_reg(RAX, RAX_H);
970
971 // Singleton class for RCX long register
972 reg_class long_rcx_reg(RCX, RCX_H);
973
974 // Singleton class for RDX long register
975 reg_class long_rdx_reg(RDX, RDX_H);
976
977 // Singleton class for R11 long register
978 reg_class long_r11_reg(R11, R11_H);
979
980 // Singleton class for RAX int register
981 reg_class int_rax_reg(RAX);
982
983 // Singleton class for RBX int register
984 reg_class int_rbx_reg(RBX);
985
986 // Singleton class for RCX int register
987 reg_class int_rcx_reg(RCX);
988
989 // Singleton class for RDX int register
990 reg_class int_rdx_reg(RDX);
991
992 // Singleton class for RDI int register
993 reg_class int_rdi_reg(RDI);
994
995 // Singleton class for instruction pointer
996 // reg_class ip_reg(RIP);
997
998 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
999 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1000 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1001 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1002 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1003 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1004 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1005 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1006 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1007 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1008 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1009 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1010 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1011 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1012 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1013 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1014 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1015 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1016 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1017 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1018 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1019 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1020 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1021 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1022 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1023 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1024 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1025 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1026 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1027 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1028 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1029 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1030
1031 alloc_class chunk2(K7, K7_H,
1032 K6, K6_H,
1033 K5, K5_H,
1034 K4, K4_H,
1035 K3, K3_H,
1036 K2, K2_H,
1037 K1, K1_H);
1038
1039 reg_class vectmask_reg(K1, K1_H,
1040 K2, K2_H,
1041 K3, K3_H,
1042 K4, K4_H,
1043 K5, K5_H,
1044 K6, K6_H,
1045 K7, K7_H);
1046
1047 reg_class vectmask_reg_K1(K1, K1_H);
1048 reg_class vectmask_reg_K2(K2, K2_H);
1049 reg_class vectmask_reg_K3(K3, K3_H);
1050 reg_class vectmask_reg_K4(K4, K4_H);
1051 reg_class vectmask_reg_K5(K5, K5_H);
1052 reg_class vectmask_reg_K6(K6, K6_H);
1053 reg_class vectmask_reg_K7(K7, K7_H);
1054
1055 // flags allocation class should be last.
1056 alloc_class chunk3(RFLAGS);
1057
1058 // Singleton class for condition codes
1059 reg_class int_flags(RFLAGS);
1060
1061 // Class for pre evex float registers
1062 reg_class float_reg_legacy(XMM0,
1063 XMM1,
1064 XMM2,
1065 XMM3,
1066 XMM4,
1067 XMM5,
1068 XMM6,
1069 XMM7,
1070 XMM8,
1071 XMM9,
1072 XMM10,
1073 XMM11,
1074 XMM12,
1075 XMM13,
1076 XMM14,
1077 XMM15);
1078
1079 // Class for evex float registers
1080 reg_class float_reg_evex(XMM0,
1081 XMM1,
1082 XMM2,
1083 XMM3,
1084 XMM4,
1085 XMM5,
1086 XMM6,
1087 XMM7,
1088 XMM8,
1089 XMM9,
1090 XMM10,
1091 XMM11,
1092 XMM12,
1093 XMM13,
1094 XMM14,
1095 XMM15,
1096 XMM16,
1097 XMM17,
1098 XMM18,
1099 XMM19,
1100 XMM20,
1101 XMM21,
1102 XMM22,
1103 XMM23,
1104 XMM24,
1105 XMM25,
1106 XMM26,
1107 XMM27,
1108 XMM28,
1109 XMM29,
1110 XMM30,
1111 XMM31);
1112
1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1115
1116 // Class for pre evex double registers
1117 reg_class double_reg_legacy(XMM0, XMM0b,
1118 XMM1, XMM1b,
1119 XMM2, XMM2b,
1120 XMM3, XMM3b,
1121 XMM4, XMM4b,
1122 XMM5, XMM5b,
1123 XMM6, XMM6b,
1124 XMM7, XMM7b,
1125 XMM8, XMM8b,
1126 XMM9, XMM9b,
1127 XMM10, XMM10b,
1128 XMM11, XMM11b,
1129 XMM12, XMM12b,
1130 XMM13, XMM13b,
1131 XMM14, XMM14b,
1132 XMM15, XMM15b);
1133
1134 // Class for evex double registers
1135 reg_class double_reg_evex(XMM0, XMM0b,
1136 XMM1, XMM1b,
1137 XMM2, XMM2b,
1138 XMM3, XMM3b,
1139 XMM4, XMM4b,
1140 XMM5, XMM5b,
1141 XMM6, XMM6b,
1142 XMM7, XMM7b,
1143 XMM8, XMM8b,
1144 XMM9, XMM9b,
1145 XMM10, XMM10b,
1146 XMM11, XMM11b,
1147 XMM12, XMM12b,
1148 XMM13, XMM13b,
1149 XMM14, XMM14b,
1150 XMM15, XMM15b,
1151 XMM16, XMM16b,
1152 XMM17, XMM17b,
1153 XMM18, XMM18b,
1154 XMM19, XMM19b,
1155 XMM20, XMM20b,
1156 XMM21, XMM21b,
1157 XMM22, XMM22b,
1158 XMM23, XMM23b,
1159 XMM24, XMM24b,
1160 XMM25, XMM25b,
1161 XMM26, XMM26b,
1162 XMM27, XMM27b,
1163 XMM28, XMM28b,
1164 XMM29, XMM29b,
1165 XMM30, XMM30b,
1166 XMM31, XMM31b);
1167
1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1170
1171 // Class for pre evex 32bit vector registers
1172 reg_class vectors_reg_legacy(XMM0,
1173 XMM1,
1174 XMM2,
1175 XMM3,
1176 XMM4,
1177 XMM5,
1178 XMM6,
1179 XMM7,
1180 XMM8,
1181 XMM9,
1182 XMM10,
1183 XMM11,
1184 XMM12,
1185 XMM13,
1186 XMM14,
1187 XMM15);
1188
1189 // Class for evex 32bit vector registers
1190 reg_class vectors_reg_evex(XMM0,
1191 XMM1,
1192 XMM2,
1193 XMM3,
1194 XMM4,
1195 XMM5,
1196 XMM6,
1197 XMM7,
1198 XMM8,
1199 XMM9,
1200 XMM10,
1201 XMM11,
1202 XMM12,
1203 XMM13,
1204 XMM14,
1205 XMM15,
1206 XMM16,
1207 XMM17,
1208 XMM18,
1209 XMM19,
1210 XMM20,
1211 XMM21,
1212 XMM22,
1213 XMM23,
1214 XMM24,
1215 XMM25,
1216 XMM26,
1217 XMM27,
1218 XMM28,
1219 XMM29,
1220 XMM30,
1221 XMM31);
1222
1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1225
1226 // Class for all 64bit vector registers
1227 reg_class vectord_reg_legacy(XMM0, XMM0b,
1228 XMM1, XMM1b,
1229 XMM2, XMM2b,
1230 XMM3, XMM3b,
1231 XMM4, XMM4b,
1232 XMM5, XMM5b,
1233 XMM6, XMM6b,
1234 XMM7, XMM7b,
1235 XMM8, XMM8b,
1236 XMM9, XMM9b,
1237 XMM10, XMM10b,
1238 XMM11, XMM11b,
1239 XMM12, XMM12b,
1240 XMM13, XMM13b,
1241 XMM14, XMM14b,
1242 XMM15, XMM15b);
1243
1244 // Class for all 64bit vector registers
1245 reg_class vectord_reg_evex(XMM0, XMM0b,
1246 XMM1, XMM1b,
1247 XMM2, XMM2b,
1248 XMM3, XMM3b,
1249 XMM4, XMM4b,
1250 XMM5, XMM5b,
1251 XMM6, XMM6b,
1252 XMM7, XMM7b,
1253 XMM8, XMM8b,
1254 XMM9, XMM9b,
1255 XMM10, XMM10b,
1256 XMM11, XMM11b,
1257 XMM12, XMM12b,
1258 XMM13, XMM13b,
1259 XMM14, XMM14b,
1260 XMM15, XMM15b,
1261 XMM16, XMM16b,
1262 XMM17, XMM17b,
1263 XMM18, XMM18b,
1264 XMM19, XMM19b,
1265 XMM20, XMM20b,
1266 XMM21, XMM21b,
1267 XMM22, XMM22b,
1268 XMM23, XMM23b,
1269 XMM24, XMM24b,
1270 XMM25, XMM25b,
1271 XMM26, XMM26b,
1272 XMM27, XMM27b,
1273 XMM28, XMM28b,
1274 XMM29, XMM29b,
1275 XMM30, XMM30b,
1276 XMM31, XMM31b);
1277
1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1280
1281 // Class for all 128bit vector registers
1282 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
1283 XMM1, XMM1b, XMM1c, XMM1d,
1284 XMM2, XMM2b, XMM2c, XMM2d,
1285 XMM3, XMM3b, XMM3c, XMM3d,
1286 XMM4, XMM4b, XMM4c, XMM4d,
1287 XMM5, XMM5b, XMM5c, XMM5d,
1288 XMM6, XMM6b, XMM6c, XMM6d,
1289 XMM7, XMM7b, XMM7c, XMM7d,
1290 XMM8, XMM8b, XMM8c, XMM8d,
1291 XMM9, XMM9b, XMM9c, XMM9d,
1292 XMM10, XMM10b, XMM10c, XMM10d,
1293 XMM11, XMM11b, XMM11c, XMM11d,
1294 XMM12, XMM12b, XMM12c, XMM12d,
1295 XMM13, XMM13b, XMM13c, XMM13d,
1296 XMM14, XMM14b, XMM14c, XMM14d,
1297 XMM15, XMM15b, XMM15c, XMM15d);
1298
1299 // Class for all 128bit vector registers
1300 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
1301 XMM1, XMM1b, XMM1c, XMM1d,
1302 XMM2, XMM2b, XMM2c, XMM2d,
1303 XMM3, XMM3b, XMM3c, XMM3d,
1304 XMM4, XMM4b, XMM4c, XMM4d,
1305 XMM5, XMM5b, XMM5c, XMM5d,
1306 XMM6, XMM6b, XMM6c, XMM6d,
1307 XMM7, XMM7b, XMM7c, XMM7d,
1308 XMM8, XMM8b, XMM8c, XMM8d,
1309 XMM9, XMM9b, XMM9c, XMM9d,
1310 XMM10, XMM10b, XMM10c, XMM10d,
1311 XMM11, XMM11b, XMM11c, XMM11d,
1312 XMM12, XMM12b, XMM12c, XMM12d,
1313 XMM13, XMM13b, XMM13c, XMM13d,
1314 XMM14, XMM14b, XMM14c, XMM14d,
1315 XMM15, XMM15b, XMM15c, XMM15d,
1316 XMM16, XMM16b, XMM16c, XMM16d,
1317 XMM17, XMM17b, XMM17c, XMM17d,
1318 XMM18, XMM18b, XMM18c, XMM18d,
1319 XMM19, XMM19b, XMM19c, XMM19d,
1320 XMM20, XMM20b, XMM20c, XMM20d,
1321 XMM21, XMM21b, XMM21c, XMM21d,
1322 XMM22, XMM22b, XMM22c, XMM22d,
1323 XMM23, XMM23b, XMM23c, XMM23d,
1324 XMM24, XMM24b, XMM24c, XMM24d,
1325 XMM25, XMM25b, XMM25c, XMM25d,
1326 XMM26, XMM26b, XMM26c, XMM26d,
1327 XMM27, XMM27b, XMM27c, XMM27d,
1328 XMM28, XMM28b, XMM28c, XMM28d,
1329 XMM29, XMM29b, XMM29c, XMM29d,
1330 XMM30, XMM30b, XMM30c, XMM30d,
1331 XMM31, XMM31b, XMM31c, XMM31d);
1332
1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1335
1336 // Class for all 256bit vector registers
1337 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1338 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1339 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1340 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1341 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1342 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1343 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1344 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1345 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1346 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1347 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1348 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1349 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1350 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1351 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1352 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
1353
1354 // Class for all 256bit vector registers
1355 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
1356 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
1357 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
1358 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
1359 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
1360 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
1361 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
1362 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h,
1363 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
1364 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
1365 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
1366 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
1367 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
1368 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
1369 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
1370 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
1371 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
1372 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
1373 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
1374 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
1375 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
1376 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
1377 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
1378 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
1379 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
1380 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
1381 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
1382 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
1383 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
1384 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
1385 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
1386 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
1387
1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
1390
1391 // Class for all 512bit vector registers
1392 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1393 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1394 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1395 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1396 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1397 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1398 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1399 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1400 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1401 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1402 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1403 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1404 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1405 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1406 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1407 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
1408 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
1409 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
1410 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
1411 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
1412 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
1413 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
1414 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
1415 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
1416 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
1417 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
1418 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
1419 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
1420 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
1421 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
1422 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
1423 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
1424
1425 // Class for restricted 512bit vector registers
1426 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
1427 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
1428 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
1429 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
1430 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
1431 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
1432 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
1433 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p,
1434 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
1435 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
1436 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
1437 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
1438 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
1439 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
1440 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
1441 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
1442
1443 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
1445
1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
1447
1448 %}
1449
1450
1451 //----------SOURCE BLOCK-------------------------------------------------------
1452 // This is a block of C++ code which provides values, functions, and
1453 // definitions necessary in the rest of the architecture description
1454
1455 source_hpp %{
1456
1457 #include "peephole_x86_64.hpp"
1458
1459 bool castLL_is_imm32(const Node* n);
1460
1461 %}
1462
1463 source %{
1464
1465 bool castLL_is_imm32(const Node* n) {
1466 assert(n->is_CastLL(), "must be a CastLL");
1467 const TypeLong* t = n->bottom_type()->is_long();
1468 return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
1469 }
1470
1471 %}
1472
1473 // Register masks
1474 source_hpp %{
1475
1476 extern RegMask _ANY_REG_mask;
1477 extern RegMask _PTR_REG_mask;
1478 extern RegMask _PTR_REG_NO_RBP_mask;
1479 extern RegMask _PTR_NO_RAX_REG_mask;
1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
1481 extern RegMask _LONG_REG_mask;
1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
1483 extern RegMask _LONG_NO_RCX_REG_mask;
1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
1485 extern RegMask _INT_REG_mask;
1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
1487 extern RegMask _INT_NO_RCX_REG_mask;
1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
1489 extern RegMask _FLOAT_REG_mask;
1490
1491 extern RegMask _STACK_OR_PTR_REG_mask;
1492 extern RegMask _STACK_OR_LONG_REG_mask;
1493 extern RegMask _STACK_OR_INT_REG_mask;
1494
1495 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
1497 inline const RegMask& STACK_OR_INT_REG_mask() { return _STACK_OR_INT_REG_mask; }
1498
1499 %}
1500
1501 source %{
1502 #define RELOC_IMM64 Assembler::imm_operand
1503 #define RELOC_DISP32 Assembler::disp32_operand
1504
1505 #define __ masm->
1506
1507 RegMask _ANY_REG_mask;
1508 RegMask _PTR_REG_mask;
1509 RegMask _PTR_REG_NO_RBP_mask;
1510 RegMask _PTR_NO_RAX_REG_mask;
1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
1512 RegMask _LONG_REG_mask;
1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
1514 RegMask _LONG_NO_RCX_REG_mask;
1515 RegMask _LONG_NO_RBP_R13_REG_mask;
1516 RegMask _INT_REG_mask;
1517 RegMask _INT_NO_RAX_RDX_REG_mask;
1518 RegMask _INT_NO_RCX_REG_mask;
1519 RegMask _INT_NO_RBP_R13_REG_mask;
1520 RegMask _FLOAT_REG_mask;
1521 RegMask _STACK_OR_PTR_REG_mask;
1522 RegMask _STACK_OR_LONG_REG_mask;
1523 RegMask _STACK_OR_INT_REG_mask;
1524
1525 static bool need_r12_heapbase() {
1526 return UseCompressedOops;
1527 }
1528
1529 void reg_mask_init() {
1530 constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
1531
1532 // _ALL_REG_mask is generated by adlc from the all_reg register class below.
1533 // We derive a number of subsets from it.
1534 _ANY_REG_mask.assignFrom(_ALL_REG_mask);
1535
1536 if (PreserveFramePointer) {
1537 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1538 _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1539 }
1540 if (need_r12_heapbase()) {
1541 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1542 _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
1543 }
1544
1545 _PTR_REG_mask.assignFrom(_ANY_REG_mask);
1546 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
1547 _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
1548 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
1549 _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
1550 if (!UseAPX) {
1551 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1552 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1553 _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
1554 }
1555 }
1556
1557 _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
1558 _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1559
1560 _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
1561 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1562 _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1563
1564 _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
1565 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1566 _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1567
1568 _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
1569 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
1570 _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
1571
1572
1573 _LONG_REG_mask.assignFrom(_PTR_REG_mask);
1574 _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
1575 _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1576
1577 _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
1578 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1579 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
1580 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1581 _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
1582
1583 _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
1584 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1585 _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
1586
1587 _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
1588 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1589 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
1590 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1591 _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
1592
1593 _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
1594 if (!UseAPX) {
1595 for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
1596 _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
1597 }
1598 }
1599
1600 if (PreserveFramePointer) {
1601 _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1602 }
1603 if (need_r12_heapbase()) {
1604 _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
1605 }
1606
1607 _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
1608 _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
1609
1610 _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
1611 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
1612 _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
1613
1614 _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
1615 _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
1616
1617 _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
1618 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
1619 _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
1620
1621 // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
1622 // from the float_reg_legacy/float_reg_evex register class.
1623 _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
1624 }
1625
1626 static bool generate_vzeroupper(Compile* C) {
1627 return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false; // Generate vzeroupper
1628 }
1629
1630 static int clear_avx_size() {
1631 return generate_vzeroupper(Compile::current()) ? 3: 0; // vzeroupper
1632 }
1633
1634 // !!!!! Special hack to get all types of calls to specify the byte offset
1635 // from the start of the call to the point where the return address
1636 // will point.
1637 int MachCallStaticJavaNode::ret_addr_offset()
1638 {
1639 int offset = 5; // 5 bytes from start of call to where return address points
1640 offset += clear_avx_size();
1641 return offset;
1642 }
1643
1644 int MachCallDynamicJavaNode::ret_addr_offset()
1645 {
1646 int offset = 15; // 15 bytes from start of call to where return address points
1647 offset += clear_avx_size();
1648 return offset;
1649 }
1650
1651 int MachCallRuntimeNode::ret_addr_offset() {
1652 int offset = 13; // movq r10,#addr; callq (r10)
1653 if (this->ideal_Opcode() != Op_CallLeafVector) {
1654 offset += clear_avx_size();
1655 }
1656 return offset;
1657 }
1658 //
1659 // Compute padding required for nodes which need alignment
1660 //
1661
1662 // The address of the call instruction needs to be 4-byte aligned to
1663 // ensure that it does not span a cache line so that it can be patched.
1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
1665 {
1666 current_offset += clear_avx_size(); // skip vzeroupper
1667 current_offset += 1; // skip call opcode byte
1668 return align_up(current_offset, alignment_required()) - current_offset;
1669 }
1670
1671 // The address of the call instruction needs to be 4-byte aligned to
1672 // ensure that it does not span a cache line so that it can be patched.
1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
1674 {
1675 current_offset += clear_avx_size(); // skip vzeroupper
1676 current_offset += 11; // skip movq instruction + call opcode byte
1677 return align_up(current_offset, alignment_required()) - current_offset;
1678 }
1679
1680 // This could be in MacroAssembler but it's fairly C2 specific
1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
1682 Label exit;
1683 __ jccb(Assembler::noParity, exit);
1684 __ pushf();
1685 //
1686 // comiss/ucomiss instructions set ZF,PF,CF flags and
1687 // zero OF,AF,SF for NaN values.
1688 // Fixup flags by zeroing ZF,PF so that compare of NaN
1689 // values returns 'less than' result (CF is set).
1690 // Leave the rest of flags unchanged.
1691 //
1692 // 7 6 5 4 3 2 1 0
1693 // |S|Z|r|A|r|P|r|C| (r - reserved bit)
1694 // 0 0 1 0 1 0 1 1 (0x2B)
1695 //
1696 __ andq(Address(rsp, 0), 0xffffff2b);
1697 __ popf();
1698 __ bind(exit);
1699 }
1700
1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
1702 // If any floating point comparison instruction is used, unordered case always triggers jump
1703 // for below condition, CF=1 is true when at least one input is NaN
1704 Label done;
1705 __ movl(dst, -1);
1706 __ jcc(Assembler::below, done);
1707 __ setcc(Assembler::notEqual, dst);
1708 __ bind(done);
1709 }
1710
1711 enum FP_PREC {
1712 fp_prec_hlf,
1713 fp_prec_flt,
1714 fp_prec_dbl
1715 };
1716
1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
1718 XMMRegister p, XMMRegister q) {
1719 if (pt == fp_prec_hlf) {
1720 __ evucomish(p, q);
1721 } else if (pt == fp_prec_flt) {
1722 __ ucomiss(p, q);
1723 } else {
1724 __ ucomisd(p, q);
1725 }
1726 }
1727
1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
1729 XMMRegister dst, XMMRegister src, Register scratch) {
1730 if (pt == fp_prec_hlf) {
1731 __ movhlf(dst, src, scratch);
1732 } else if (pt == fp_prec_flt) {
1733 __ movflt(dst, src);
1734 } else {
1735 __ movdbl(dst, src);
1736 }
1737 }
1738
1739 // Math.min() # Math.max()
1740 // -----------------------------
1741 // (v)ucomis[h/s/d] #
1742 // ja -> b # a
1743 // jp -> NaN # NaN
1744 // jb -> a # b
1745 // je -> a | b # a & b
1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
1747 XMMRegister a, XMMRegister b, Register rt,
1748 bool min, enum FP_PREC pt) {
1749 Label nan, zero, below, above, done;
1750
1751 emit_fp_ucom(masm, pt, a, b);
1752
1753 if (dst->encoding() != (min ? b : a)->encoding()) {
1754 __ jccb(Assembler::above, above); // CF=0 & ZF=0
1755 } else {
1756 __ jccb(Assembler::above, done);
1757 }
1758 __ jccb(Assembler::parity, nan); // PF=1
1759 __ jccb(Assembler::below, below); // CF=1
1760
1761 // equal
1762 // Using bitwise operations is a low cost way to compute the correct result
1763 // for zero and non-zero inputs in this scenario except for NaN, which is
1764 // handled separately. The mantissa and exponent are valid with either
1765 // bitwise operation. For zero inputs, the sign bit is chosen according to
1766 // whether a minimum or maximum value is required.
1767 if (min) {
1768 // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
1769 __ vpor(dst, a, b, Assembler::AVX_128bit);
1770 } else {
1771 // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
1772 __ vpand(dst, a, b, Assembler::AVX_128bit);
1773 }
1774 __ jmp(done);
1775
1776 __ bind(above);
1777 movfp(masm, pt, dst, min ? b : a, rt);
1778 __ jmp(done);
1779
1780 __ bind(nan);
1781 if (pt == fp_prec_hlf) {
1782 __ movl(rt, 0x00007e00); // Float16.NaN
1783 __ evmovw(dst, rt);
1784 } else if (pt == fp_prec_flt) {
1785 __ movl(rt, 0x7fc00000); // Float.NaN
1786 __ movdl(dst, rt);
1787 } else {
1788 __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
1789 __ movdq(dst, rt);
1790 }
1791 __ jmp(done);
1792
1793 __ bind(below);
1794 movfp(masm, pt, dst, min ? a : b, rt);
1795
1796 __ bind(done);
1797 }
1798
1799 //=============================================================================
1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
1801
1802 int ConstantTable::calculate_table_base_offset() const {
1803 return 0; // absolute addressing, no offset
1804 }
1805
1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1808 ShouldNotReachHere();
1809 }
1810
1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
1812 // Empty encoding
1813 }
1814
1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1816 return 0;
1817 }
1818
1819 #ifndef PRODUCT
1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1821 st->print("# MachConstantBaseNode (empty encoding)");
1822 }
1823 #endif
1824
1825
1826 //=============================================================================
1827 #ifndef PRODUCT
1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1829 Compile* C = ra_->C;
1830
1831 int framesize = C->output()->frame_size_in_bytes();
1832 int bangsize = C->output()->bang_size_in_bytes();
1833 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1834 // Remove wordSize for return addr which is already pushed.
1835 framesize -= wordSize;
1836
1837 if (C->output()->need_stack_bang(bangsize)) {
1838 framesize -= wordSize;
1839 st->print("# stack bang (%d bytes)", bangsize);
1840 st->print("\n\t");
1841 st->print("pushq rbp\t# Save rbp");
1842 if (PreserveFramePointer) {
1843 st->print("\n\t");
1844 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1845 }
1846 if (framesize) {
1847 st->print("\n\t");
1848 st->print("subq rsp, #%d\t# Create frame",framesize);
1849 }
1850 } else {
1851 st->print("subq rsp, #%d\t# Create frame",framesize);
1852 st->print("\n\t");
1853 framesize -= wordSize;
1854 st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize);
1855 if (PreserveFramePointer) {
1856 st->print("\n\t");
1857 st->print("movq rbp, rsp\t# Save the caller's SP into rbp");
1858 if (framesize > 0) {
1859 st->print("\n\t");
1860 st->print("addq rbp, #%d", framesize);
1861 }
1862 }
1863 }
1864
1865 if (VerifyStackAtCalls) {
1866 st->print("\n\t");
1867 framesize -= wordSize;
1868 st->print("movq [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
1869 #ifdef ASSERT
1870 st->print("\n\t");
1871 st->print("# stack alignment check");
1872 #endif
1873 }
1874 if (C->stub_function() != nullptr) {
1875 st->print("\n\t");
1876 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
1877 st->print("\n\t");
1878 st->print("je fast_entry\t");
1879 st->print("\n\t");
1880 st->print("call #nmethod_entry_barrier_stub\t");
1881 st->print("\n\tfast_entry:");
1882 }
1883 st->cr();
1884 }
1885 #endif
1886
1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
1888 Compile* C = ra_->C;
1889
1890 int framesize = C->output()->frame_size_in_bytes();
1891 int bangsize = C->output()->bang_size_in_bytes();
1892
1893 if (C->clinit_barrier_on_entry()) {
1894 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1895 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
1896
1897 Label L_skip_barrier;
1898 Register klass = rscratch1;
1899
1900 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
1901 __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
1902
1903 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
1904
1905 __ bind(L_skip_barrier);
1906 }
1907
1908 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
1909
1910 C->output()->set_frame_complete(__ offset());
1911
1912 if (C->has_mach_constant_base_node()) {
1913 // NOTE: We set the table base offset here because users might be
1914 // emitted before MachConstantBaseNode.
1915 ConstantTable& constant_table = C->output()->constant_table();
1916 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1917 }
1918 }
1919
1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1921 {
1922 return MachNode::size(ra_); // too many variables; just compute it
1923 // the hard way
1924 }
1925
1926 int MachPrologNode::reloc() const
1927 {
1928 return 0; // a large enough number
1929 }
1930
1931 //=============================================================================
1932 #ifndef PRODUCT
1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1934 {
1935 Compile* C = ra_->C;
1936 if (generate_vzeroupper(C)) {
1937 st->print("vzeroupper");
1938 st->cr(); st->print("\t");
1939 }
1940
1941 int framesize = C->output()->frame_size_in_bytes();
1942 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1943 // Remove word for return adr already pushed
1944 // and RBP
1945 framesize -= 2*wordSize;
1946
1947 if (framesize) {
1948 st->print_cr("addq rsp, %d\t# Destroy frame", framesize);
1949 st->print("\t");
1950 }
1951
1952 st->print_cr("popq rbp");
1953 if (do_polling() && C->is_method_compilation()) {
1954 st->print("\t");
1955 st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
1956 "ja #safepoint_stub\t"
1957 "# Safepoint: poll for GC");
1958 }
1959 }
1960 #endif
1961
1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
1963 {
1964 Compile* C = ra_->C;
1965
1966 if (generate_vzeroupper(C)) {
1967 // Clear upper bits of YMM registers when current compiled code uses
1968 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1969 __ vzeroupper();
1970 }
1971
1972 int framesize = C->output()->frame_size_in_bytes();
1973 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
1974 // Remove word for return adr already pushed
1975 // and RBP
1976 framesize -= 2*wordSize;
1977
1978 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
1979
1980 if (framesize) {
1981 __ addq(rsp, framesize);
1982 }
1983
1984 __ popq(rbp);
1985
1986 if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1987 __ reserved_stack_check();
1988 }
1989
1990 if (do_polling() && C->is_method_compilation()) {
1991 Label dummy_label;
1992 Label* code_stub = &dummy_label;
1993 if (!C->output()->in_scratch_emit_size()) {
1994 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
1995 C->output()->add_stub(stub);
1996 code_stub = &stub->entry();
1997 }
1998 __ relocate(relocInfo::poll_return_type);
1999 __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
2000 }
2001 }
2002
2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
2004 {
2005 return MachNode::size(ra_); // too many variables; just compute it
2006 // the hard way
2007 }
2008
2009 int MachEpilogNode::reloc() const
2010 {
2011 return 2; // a large enough number
2012 }
2013
2014 const Pipeline* MachEpilogNode::pipeline() const
2015 {
2016 return MachNode::pipeline_class();
2017 }
2018
2019 //=============================================================================
2020
2021 enum RC {
2022 rc_bad,
2023 rc_int,
2024 rc_kreg,
2025 rc_float,
2026 rc_stack
2027 };
2028
2029 static enum RC rc_class(OptoReg::Name reg)
2030 {
2031 if( !OptoReg::is_valid(reg) ) return rc_bad;
2032
2033 if (OptoReg::is_stack(reg)) return rc_stack;
2034
2035 VMReg r = OptoReg::as_VMReg(reg);
2036
2037 if (r->is_Register()) return rc_int;
2038
2039 if (r->is_KRegister()) return rc_kreg;
2040
2041 assert(r->is_XMMRegister(), "must be");
2042 return rc_float;
2043 }
2044
2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
2047 int src_hi, int dst_hi, uint ireg, outputStream* st);
2048
2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
2050 int stack_offset, int reg, uint ireg, outputStream* st);
2051
2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
2053 int dst_offset, uint ireg, outputStream* st) {
2054 if (masm) {
2055 switch (ireg) {
2056 case Op_VecS:
2057 __ movq(Address(rsp, -8), rax);
2058 __ movl(rax, Address(rsp, src_offset));
2059 __ movl(Address(rsp, dst_offset), rax);
2060 __ movq(rax, Address(rsp, -8));
2061 break;
2062 case Op_VecD:
2063 __ pushq(Address(rsp, src_offset));
2064 __ popq (Address(rsp, dst_offset));
2065 break;
2066 case Op_VecX:
2067 __ pushq(Address(rsp, src_offset));
2068 __ popq (Address(rsp, dst_offset));
2069 __ pushq(Address(rsp, src_offset+8));
2070 __ popq (Address(rsp, dst_offset+8));
2071 break;
2072 case Op_VecY:
2073 __ vmovdqu(Address(rsp, -32), xmm0);
2074 __ vmovdqu(xmm0, Address(rsp, src_offset));
2075 __ vmovdqu(Address(rsp, dst_offset), xmm0);
2076 __ vmovdqu(xmm0, Address(rsp, -32));
2077 break;
2078 case Op_VecZ:
2079 __ evmovdquq(Address(rsp, -64), xmm0, 2);
2080 __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
2081 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
2082 __ evmovdquq(xmm0, Address(rsp, -64), 2);
2083 break;
2084 default:
2085 ShouldNotReachHere();
2086 }
2087 #ifndef PRODUCT
2088 } else {
2089 switch (ireg) {
2090 case Op_VecS:
2091 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2092 "movl rax, [rsp + #%d]\n\t"
2093 "movl [rsp + #%d], rax\n\t"
2094 "movq rax, [rsp - #8]",
2095 src_offset, dst_offset);
2096 break;
2097 case Op_VecD:
2098 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2099 "popq [rsp + #%d]",
2100 src_offset, dst_offset);
2101 break;
2102 case Op_VecX:
2103 st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
2104 "popq [rsp + #%d]\n\t"
2105 "pushq [rsp + #%d]\n\t"
2106 "popq [rsp + #%d]",
2107 src_offset, dst_offset, src_offset+8, dst_offset+8);
2108 break;
2109 case Op_VecY:
2110 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
2111 "vmovdqu xmm0, [rsp + #%d]\n\t"
2112 "vmovdqu [rsp + #%d], xmm0\n\t"
2113 "vmovdqu xmm0, [rsp - #32]",
2114 src_offset, dst_offset);
2115 break;
2116 case Op_VecZ:
2117 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
2118 "vmovdqu xmm0, [rsp + #%d]\n\t"
2119 "vmovdqu [rsp + #%d], xmm0\n\t"
2120 "vmovdqu xmm0, [rsp - #64]",
2121 src_offset, dst_offset);
2122 break;
2123 default:
2124 ShouldNotReachHere();
2125 }
2126 #endif
2127 }
2128 }
2129
2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
2131 PhaseRegAlloc* ra_,
2132 bool do_size,
2133 outputStream* st) const {
2134 assert(masm != nullptr || st != nullptr, "sanity");
2135 // Get registers to move
2136 OptoReg::Name src_second = ra_->get_reg_second(in(1));
2137 OptoReg::Name src_first = ra_->get_reg_first(in(1));
2138 OptoReg::Name dst_second = ra_->get_reg_second(this);
2139 OptoReg::Name dst_first = ra_->get_reg_first(this);
2140
2141 enum RC src_second_rc = rc_class(src_second);
2142 enum RC src_first_rc = rc_class(src_first);
2143 enum RC dst_second_rc = rc_class(dst_second);
2144 enum RC dst_first_rc = rc_class(dst_first);
2145
2146 assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
2147 "must move at least 1 register" );
2148
2149 if (src_first == dst_first && src_second == dst_second) {
2150 // Self copy, no move
2151 return 0;
2152 }
2153 if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
2154 uint ireg = ideal_reg();
2155 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
2156 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
2157 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
2158 // mem -> mem
2159 int src_offset = ra_->reg2offset(src_first);
2160 int dst_offset = ra_->reg2offset(dst_first);
2161 vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
2162 } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
2163 vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
2164 } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
2165 int stack_offset = ra_->reg2offset(dst_first);
2166 vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
2167 } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
2168 int stack_offset = ra_->reg2offset(src_first);
2169 vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st);
2170 } else {
2171 ShouldNotReachHere();
2172 }
2173 return 0;
2174 }
2175 if (src_first_rc == rc_stack) {
2176 // mem ->
2177 if (dst_first_rc == rc_stack) {
2178 // mem -> mem
2179 assert(src_second != dst_first, "overlap");
2180 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2181 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2182 // 64-bit
2183 int src_offset = ra_->reg2offset(src_first);
2184 int dst_offset = ra_->reg2offset(dst_first);
2185 if (masm) {
2186 __ pushq(Address(rsp, src_offset));
2187 __ popq (Address(rsp, dst_offset));
2188 #ifndef PRODUCT
2189 } else {
2190 st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
2191 "popq [rsp + #%d]",
2192 src_offset, dst_offset);
2193 #endif
2194 }
2195 } else {
2196 // 32-bit
2197 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2198 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2199 // No pushl/popl, so:
2200 int src_offset = ra_->reg2offset(src_first);
2201 int dst_offset = ra_->reg2offset(dst_first);
2202 if (masm) {
2203 __ movq(Address(rsp, -8), rax);
2204 __ movl(rax, Address(rsp, src_offset));
2205 __ movl(Address(rsp, dst_offset), rax);
2206 __ movq(rax, Address(rsp, -8));
2207 #ifndef PRODUCT
2208 } else {
2209 st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
2210 "movl rax, [rsp + #%d]\n\t"
2211 "movl [rsp + #%d], rax\n\t"
2212 "movq rax, [rsp - #8]",
2213 src_offset, dst_offset);
2214 #endif
2215 }
2216 }
2217 return 0;
2218 } else if (dst_first_rc == rc_int) {
2219 // mem -> gpr
2220 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2221 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2222 // 64-bit
2223 int offset = ra_->reg2offset(src_first);
2224 if (masm) {
2225 __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2226 #ifndef PRODUCT
2227 } else {
2228 st->print("movq %s, [rsp + #%d]\t# spill",
2229 Matcher::regName[dst_first],
2230 offset);
2231 #endif
2232 }
2233 } else {
2234 // 32-bit
2235 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2236 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2237 int offset = ra_->reg2offset(src_first);
2238 if (masm) {
2239 __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2240 #ifndef PRODUCT
2241 } else {
2242 st->print("movl %s, [rsp + #%d]\t# spill",
2243 Matcher::regName[dst_first],
2244 offset);
2245 #endif
2246 }
2247 }
2248 return 0;
2249 } else if (dst_first_rc == rc_float) {
2250 // mem-> xmm
2251 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2252 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2253 // 64-bit
2254 int offset = ra_->reg2offset(src_first);
2255 if (masm) {
2256 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2257 #ifndef PRODUCT
2258 } else {
2259 st->print("%s %s, [rsp + #%d]\t# spill",
2260 UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
2261 Matcher::regName[dst_first],
2262 offset);
2263 #endif
2264 }
2265 } else {
2266 // 32-bit
2267 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2268 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2269 int offset = ra_->reg2offset(src_first);
2270 if (masm) {
2271 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2272 #ifndef PRODUCT
2273 } else {
2274 st->print("movss %s, [rsp + #%d]\t# spill",
2275 Matcher::regName[dst_first],
2276 offset);
2277 #endif
2278 }
2279 }
2280 return 0;
2281 } else if (dst_first_rc == rc_kreg) {
2282 // mem -> kreg
2283 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2284 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2285 // 64-bit
2286 int offset = ra_->reg2offset(src_first);
2287 if (masm) {
2288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
2289 #ifndef PRODUCT
2290 } else {
2291 st->print("kmovq %s, [rsp + #%d]\t# spill",
2292 Matcher::regName[dst_first],
2293 offset);
2294 #endif
2295 }
2296 }
2297 return 0;
2298 }
2299 } else if (src_first_rc == rc_int) {
2300 // gpr ->
2301 if (dst_first_rc == rc_stack) {
2302 // gpr -> mem
2303 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2304 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2305 // 64-bit
2306 int offset = ra_->reg2offset(dst_first);
2307 if (masm) {
2308 __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2309 #ifndef PRODUCT
2310 } else {
2311 st->print("movq [rsp + #%d], %s\t# spill",
2312 offset,
2313 Matcher::regName[src_first]);
2314 #endif
2315 }
2316 } else {
2317 // 32-bit
2318 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2319 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2320 int offset = ra_->reg2offset(dst_first);
2321 if (masm) {
2322 __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
2323 #ifndef PRODUCT
2324 } else {
2325 st->print("movl [rsp + #%d], %s\t# spill",
2326 offset,
2327 Matcher::regName[src_first]);
2328 #endif
2329 }
2330 }
2331 return 0;
2332 } else if (dst_first_rc == rc_int) {
2333 // gpr -> gpr
2334 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2335 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2336 // 64-bit
2337 if (masm) {
2338 __ movq(as_Register(Matcher::_regEncode[dst_first]),
2339 as_Register(Matcher::_regEncode[src_first]));
2340 #ifndef PRODUCT
2341 } else {
2342 st->print("movq %s, %s\t# spill",
2343 Matcher::regName[dst_first],
2344 Matcher::regName[src_first]);
2345 #endif
2346 }
2347 return 0;
2348 } else {
2349 // 32-bit
2350 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2351 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2352 if (masm) {
2353 __ movl(as_Register(Matcher::_regEncode[dst_first]),
2354 as_Register(Matcher::_regEncode[src_first]));
2355 #ifndef PRODUCT
2356 } else {
2357 st->print("movl %s, %s\t# spill",
2358 Matcher::regName[dst_first],
2359 Matcher::regName[src_first]);
2360 #endif
2361 }
2362 return 0;
2363 }
2364 } else if (dst_first_rc == rc_float) {
2365 // gpr -> xmm
2366 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2367 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2368 // 64-bit
2369 if (masm) {
2370 __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2371 #ifndef PRODUCT
2372 } else {
2373 st->print("movdq %s, %s\t# spill",
2374 Matcher::regName[dst_first],
2375 Matcher::regName[src_first]);
2376 #endif
2377 }
2378 } else {
2379 // 32-bit
2380 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2381 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2382 if (masm) {
2383 __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2384 #ifndef PRODUCT
2385 } else {
2386 st->print("movdl %s, %s\t# spill",
2387 Matcher::regName[dst_first],
2388 Matcher::regName[src_first]);
2389 #endif
2390 }
2391 }
2392 return 0;
2393 } else if (dst_first_rc == rc_kreg) {
2394 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2395 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2396 // 64-bit
2397 if (masm) {
2398 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
2399 #ifndef PRODUCT
2400 } else {
2401 st->print("kmovq %s, %s\t# spill",
2402 Matcher::regName[dst_first],
2403 Matcher::regName[src_first]);
2404 #endif
2405 }
2406 }
2407 Unimplemented();
2408 return 0;
2409 }
2410 } else if (src_first_rc == rc_float) {
2411 // xmm ->
2412 if (dst_first_rc == rc_stack) {
2413 // xmm -> mem
2414 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2415 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2416 // 64-bit
2417 int offset = ra_->reg2offset(dst_first);
2418 if (masm) {
2419 __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2420 #ifndef PRODUCT
2421 } else {
2422 st->print("movsd [rsp + #%d], %s\t# spill",
2423 offset,
2424 Matcher::regName[src_first]);
2425 #endif
2426 }
2427 } else {
2428 // 32-bit
2429 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2430 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2431 int offset = ra_->reg2offset(dst_first);
2432 if (masm) {
2433 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
2434 #ifndef PRODUCT
2435 } else {
2436 st->print("movss [rsp + #%d], %s\t# spill",
2437 offset,
2438 Matcher::regName[src_first]);
2439 #endif
2440 }
2441 }
2442 return 0;
2443 } else if (dst_first_rc == rc_int) {
2444 // xmm -> gpr
2445 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2446 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2447 // 64-bit
2448 if (masm) {
2449 __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2450 #ifndef PRODUCT
2451 } else {
2452 st->print("movdq %s, %s\t# spill",
2453 Matcher::regName[dst_first],
2454 Matcher::regName[src_first]);
2455 #endif
2456 }
2457 } else {
2458 // 32-bit
2459 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2460 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2461 if (masm) {
2462 __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2463 #ifndef PRODUCT
2464 } else {
2465 st->print("movdl %s, %s\t# spill",
2466 Matcher::regName[dst_first],
2467 Matcher::regName[src_first]);
2468 #endif
2469 }
2470 }
2471 return 0;
2472 } else if (dst_first_rc == rc_float) {
2473 // xmm -> xmm
2474 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2475 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2476 // 64-bit
2477 if (masm) {
2478 __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2479 #ifndef PRODUCT
2480 } else {
2481 st->print("%s %s, %s\t# spill",
2482 UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
2483 Matcher::regName[dst_first],
2484 Matcher::regName[src_first]);
2485 #endif
2486 }
2487 } else {
2488 // 32-bit
2489 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
2490 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
2491 if (masm) {
2492 __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
2493 #ifndef PRODUCT
2494 } else {
2495 st->print("%s %s, %s\t# spill",
2496 UseXmmRegToRegMoveAll ? "movaps" : "movss ",
2497 Matcher::regName[dst_first],
2498 Matcher::regName[src_first]);
2499 #endif
2500 }
2501 }
2502 return 0;
2503 } else if (dst_first_rc == rc_kreg) {
2504 assert(false, "Illegal spilling");
2505 return 0;
2506 }
2507 } else if (src_first_rc == rc_kreg) {
2508 if (dst_first_rc == rc_stack) {
2509 // mem -> kreg
2510 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2511 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2512 // 64-bit
2513 int offset = ra_->reg2offset(dst_first);
2514 if (masm) {
2515 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
2516 #ifndef PRODUCT
2517 } else {
2518 st->print("kmovq [rsp + #%d] , %s\t# spill",
2519 offset,
2520 Matcher::regName[src_first]);
2521 #endif
2522 }
2523 }
2524 return 0;
2525 } else if (dst_first_rc == rc_int) {
2526 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2527 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2528 // 64-bit
2529 if (masm) {
2530 __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2531 #ifndef PRODUCT
2532 } else {
2533 st->print("kmovq %s, %s\t# spill",
2534 Matcher::regName[dst_first],
2535 Matcher::regName[src_first]);
2536 #endif
2537 }
2538 }
2539 Unimplemented();
2540 return 0;
2541 } else if (dst_first_rc == rc_kreg) {
2542 if ((src_first & 1) == 0 && src_first + 1 == src_second &&
2543 (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
2544 // 64-bit
2545 if (masm) {
2546 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
2547 #ifndef PRODUCT
2548 } else {
2549 st->print("kmovq %s, %s\t# spill",
2550 Matcher::regName[dst_first],
2551 Matcher::regName[src_first]);
2552 #endif
2553 }
2554 }
2555 return 0;
2556 } else if (dst_first_rc == rc_float) {
2557 assert(false, "Illegal spill");
2558 return 0;
2559 }
2560 }
2561
2562 assert(0," foo ");
2563 Unimplemented();
2564 return 0;
2565 }
2566
2567 #ifndef PRODUCT
2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
2569 implementation(nullptr, ra_, false, st);
2570 }
2571 #endif
2572
2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
2574 implementation(masm, ra_, false, nullptr);
2575 }
2576
2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
2578 return MachNode::size(ra_);
2579 }
2580
2581 //=============================================================================
2582 #ifndef PRODUCT
2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2584 {
2585 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2586 int reg = ra_->get_reg_first(this);
2587 st->print("leaq %s, [rsp + #%d]\t# box lock",
2588 Matcher::regName[reg], offset);
2589 }
2590 #endif
2591
2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2593 {
2594 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2595 int reg = ra_->get_encode(this);
2596
2597 __ lea(as_Register(reg), Address(rsp, offset));
2598 }
2599
2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
2601 {
2602 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
2603 if (ra_->get_encode(this) > 15) {
2604 return (offset < 0x80) ? 6 : 9; // REX2
2605 } else {
2606 return (offset < 0x80) ? 5 : 8; // REX
2607 }
2608 }
2609
2610 //=============================================================================
2611 #ifndef PRODUCT
2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
2613 {
2614 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
2615 st->print_cr("\tcmpl rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
2616 st->print_cr("\tjne SharedRuntime::_ic_miss_stub");
2617 }
2618 #endif
2619
2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
2621 {
2622 __ ic_check(InteriorEntryAlignment);
2623 }
2624
2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2626 {
2627 return MachNode::size(ra_); // too many variables; just compute it
2628 // the hard way
2629 }
2630
2631
2632 //=============================================================================
2633
2634 bool Matcher::supports_vector_calling_convention(void) {
2635 return EnableVectorSupport;
2636 }
2637
2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
2639 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
2640 }
2641
2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
2643 return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
2644 }
2645
2646 #ifdef ASSERT
2647 static bool is_ndd_demotable(const MachNode* mdef) {
2648 return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
2649 }
2650 #endif
2651
2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
2653 int oper_index) {
2654 if (mdef == nullptr) {
2655 return false;
2656 }
2657
2658 if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
2659 mdef->in(mdef->operand_index(oper_index)) == nullptr) {
2660 assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
2661 assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
2662 return false;
2663 }
2664
2665 // Complex memory operand covers multiple incoming edges needed for
2666 // address computation. Biasing def towards any address component will not
2667 // result in NDD demotion by assembler.
2668 if (mdef->operand_num_edges(oper_index) != 1) {
2669 return false;
2670 }
2671
2672 // Demotion candidate must be register mask compatible with definition.
2673 const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
2674 if (!oper_mask.overlap(mdef->out_RegMask())) {
2675 assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
2676 return false;
2677 }
2678
2679 switch (oper_index) {
2680 // First operand of MachNode corresponding to Intel APX NDD selection
2681 // pattern can share its assigned register with definition operand if
2682 // their live ranges do not overlap. In such a scenario we can demote
2683 // it to legacy map0/map1 instruction by replacing its 4-byte extended
2684 // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
2685 // are decorated with a special flag by instruction selector.
2686 case 1:
2687 return is_ndd_demotable_opr1(mdef);
2688
2689 // Definition operand of commutative operation can be biased towards second
2690 // operand.
2691 case 2:
2692 return is_ndd_demotable_opr2(mdef);
2693
2694 // Current scheme only selects up to two biasing candidates
2695 default:
2696 assert(false, "unhandled operand index: %s", mdef->Name());
2697 break;
2698 }
2699
2700 return false;
2701 }
2702
2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
2704 assert(EnableVectorSupport, "sanity");
2705 int lo = XMM0_num;
2706 int hi = XMM0b_num;
2707 if (ideal_reg == Op_VecX) hi = XMM0d_num;
2708 else if (ideal_reg == Op_VecY) hi = XMM0h_num;
2709 else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
2710 return OptoRegPair(hi, lo);
2711 }
2712
2713 // Is this branch offset short enough that a short branch can be used?
2714 //
2715 // NOTE: If the platform does not provide any short branch variants, then
2716 // this method should return false for offset 0.
2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2718 // The passed offset is relative to address of the branch.
2719 // On 86 a branch displacement is calculated relative to address
2720 // of a next instruction.
2721 offset -= br_size;
2722
2723 // the short version of jmpConUCF2 contains multiple branches,
2724 // making the reach slightly less
2725 if (rule == jmpConUCF2_rule)
2726 return (-126 <= offset && offset <= 125);
2727 return (-128 <= offset && offset <= 127);
2728 }
2729
2730 #ifdef ASSERT
2731 // Return whether or not this register is ever used as an argument.
2732 bool Matcher::can_be_java_arg(int reg)
2733 {
2734 return
2735 reg == RDI_num || reg == RDI_H_num ||
2736 reg == RSI_num || reg == RSI_H_num ||
2737 reg == RDX_num || reg == RDX_H_num ||
2738 reg == RCX_num || reg == RCX_H_num ||
2739 reg == R8_num || reg == R8_H_num ||
2740 reg == R9_num || reg == R9_H_num ||
2741 reg == R12_num || reg == R12_H_num ||
2742 reg == XMM0_num || reg == XMM0b_num ||
2743 reg == XMM1_num || reg == XMM1b_num ||
2744 reg == XMM2_num || reg == XMM2b_num ||
2745 reg == XMM3_num || reg == XMM3b_num ||
2746 reg == XMM4_num || reg == XMM4b_num ||
2747 reg == XMM5_num || reg == XMM5b_num ||
2748 reg == XMM6_num || reg == XMM6b_num ||
2749 reg == XMM7_num || reg == XMM7b_num;
2750 }
2751 #endif
2752
2753 uint Matcher::int_pressure_limit()
2754 {
2755 return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
2756 }
2757
2758 uint Matcher::float_pressure_limit()
2759 {
2760 // After experiment around with different values, the following default threshold
2761 // works best for LCM's register pressure scheduling on x64.
2762 uint dec_count = VM_Version::supports_evex() ? 4 : 2;
2763 uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
2764 return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
2765 }
2766
2767 // Register for DIVI projection of divmodI
2768 const RegMask& Matcher::divI_proj_mask() {
2769 return INT_RAX_REG_mask();
2770 }
2771
2772 // Register for MODI projection of divmodI
2773 const RegMask& Matcher::modI_proj_mask() {
2774 return INT_RDX_REG_mask();
2775 }
2776
2777 // Register for DIVL projection of divmodL
2778 const RegMask& Matcher::divL_proj_mask() {
2779 return LONG_RAX_REG_mask();
2780 }
2781
2782 // Register for MODL projection of divmodL
2783 const RegMask& Matcher::modL_proj_mask() {
2784 return LONG_RDX_REG_mask();
2785 }
2786
2787 %}
2788
2789 source_hpp %{
2790 // Header information of the source block.
2791 // Method declarations/definitions which are used outside
2792 // the ad-scope can conveniently be defined here.
2793 //
2794 // To keep related declarations/definitions/uses close together,
2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
2796
2797 #include "runtime/vm_version.hpp"
2798
2799 class NativeJump;
2800
2801 class CallStubImpl {
2802
2803 //--------------------------------------------------------------
2804 //---< Used for optimization in Compile::shorten_branches >---
2805 //--------------------------------------------------------------
2806
2807 public:
2808 // Size of call trampoline stub.
2809 static uint size_call_trampoline() {
2810 return 0; // no call trampolines on this platform
2811 }
2812
2813 // number of relocations needed by a call trampoline stub
2814 static uint reloc_call_trampoline() {
2815 return 0; // no call trampolines on this platform
2816 }
2817 };
2818
2819 class HandlerImpl {
2820
2821 public:
2822
2823 static int emit_deopt_handler(C2_MacroAssembler* masm);
2824
2825 static uint size_deopt_handler() {
2826 // one call and one jmp.
2827 return 7;
2828 }
2829 };
2830
2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
2832 switch(bytes) {
2833 case 4: // fall-through
2834 case 8: // fall-through
2835 case 16: return Assembler::AVX_128bit;
2836 case 32: return Assembler::AVX_256bit;
2837 case 64: return Assembler::AVX_512bit;
2838
2839 default: {
2840 ShouldNotReachHere();
2841 return Assembler::AVX_NoVec;
2842 }
2843 }
2844 }
2845
2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
2847 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
2848 }
2849
2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
2851 uint def_idx = use->operand_index(opnd);
2852 Node* def = use->in(def_idx);
2853 return vector_length_encoding(def);
2854 }
2855
2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
2857 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
2858 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
2859 }
2860
2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
2862 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
2863 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
2864 }
2865
2866 class Node::PD {
2867 public:
2868 enum NodeFlags : uint64_t {
2869 Flag_intel_jcc_erratum = Node::_last_flag << 1,
2870 Flag_sets_carry_flag = Node::_last_flag << 2,
2871 Flag_sets_parity_flag = Node::_last_flag << 3,
2872 Flag_sets_zero_flag = Node::_last_flag << 4,
2873 Flag_sets_overflow_flag = Node::_last_flag << 5,
2874 Flag_sets_sign_flag = Node::_last_flag << 6,
2875 Flag_clears_carry_flag = Node::_last_flag << 7,
2876 Flag_clears_parity_flag = Node::_last_flag << 8,
2877 Flag_clears_zero_flag = Node::_last_flag << 9,
2878 Flag_clears_overflow_flag = Node::_last_flag << 10,
2879 Flag_clears_sign_flag = Node::_last_flag << 11,
2880 Flag_ndd_demotable_opr1 = Node::_last_flag << 12,
2881 Flag_ndd_demotable_opr2 = Node::_last_flag << 13,
2882 _last_flag = Flag_ndd_demotable_opr2
2883 };
2884 };
2885
2886 %} // end source_hpp
2887
2888 source %{
2889
2890 #include "opto/addnode.hpp"
2891 #include "c2_intelJccErratum_x86.hpp"
2892
2893 void PhaseOutput::pd_perform_mach_node_analysis() {
2894 if (VM_Version::has_intel_jcc_erratum()) {
2895 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
2896 _buf_sizes._code += extra_padding;
2897 }
2898 }
2899
2900 int MachNode::pd_alignment_required() const {
2901 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
2902 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
2903 return IntelJccErratum::largest_jcc_size() + 1;
2904 } else {
2905 return 1;
2906 }
2907 }
2908
2909 int MachNode::compute_padding(int current_offset) const {
2910 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
2911 Compile* C = Compile::current();
2912 PhaseOutput* output = C->output();
2913 Block* block = output->block();
2914 int index = output->index();
2915 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
2916 } else {
2917 return 0;
2918 }
2919 }
2920
2921 // Emit deopt handler code.
2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
2923
2924 // Note that the code buffer's insts_mark is always relative to insts.
2925 // That's why we must use the macroassembler to generate a handler.
2926 address base = __ start_a_stub(size_deopt_handler());
2927 if (base == nullptr) {
2928 ciEnv::current()->record_failure("CodeCache is full");
2929 return 0; // CodeBuffer::expand failed
2930 }
2931 int offset = __ offset();
2932
2933 Label start;
2934 __ bind(start);
2935
2936 __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2937
2938 int entry_offset = __ offset();
2939
2940 __ jmp(start);
2941
2942 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
2943 assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
2944 "out of bounds read in post-call NOP check");
2945 __ end_a_stub();
2946 return entry_offset;
2947 }
2948
2949 static Assembler::Width widthForType(BasicType bt) {
2950 if (bt == T_BYTE) {
2951 return Assembler::B;
2952 } else if (bt == T_SHORT) {
2953 return Assembler::W;
2954 } else if (bt == T_INT) {
2955 return Assembler::D;
2956 } else {
2957 assert(bt == T_LONG, "not a long: %s", type2name(bt));
2958 return Assembler::Q;
2959 }
2960 }
2961
2962 //=============================================================================
2963
2964 // Float masks come from different places depending on platform.
2965 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
2966 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
2967 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
2968 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
2969 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
2970 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
2971 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
2972 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
2973 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
2974 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
2975 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
2976 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
2977 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
2978 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
2979 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
2980 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
2981 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
2982 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
2983 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
2984
2985 //=============================================================================
2986 bool Matcher::match_rule_supported(int opcode) {
2987 if (!has_match_rule(opcode)) {
2988 return false; // no match rule present
2989 }
2990 switch (opcode) {
2991 case Op_AbsVL:
2992 case Op_StoreVectorScatter:
2993 if (UseAVX < 3) {
2994 return false;
2995 }
2996 break;
2997 case Op_PopCountI:
2998 case Op_PopCountL:
2999 if (!UsePopCountInstruction) {
3000 return false;
3001 }
3002 break;
3003 case Op_PopCountVI:
3004 if (UseAVX < 2) {
3005 return false;
3006 }
3007 break;
3008 case Op_CompressV:
3009 case Op_ExpandV:
3010 case Op_PopCountVL:
3011 if (UseAVX < 2) {
3012 return false;
3013 }
3014 break;
3015 case Op_MulVI:
3016 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
3017 return false;
3018 }
3019 break;
3020 case Op_MulVL:
3021 if (UseSSE < 4) { // only with SSE4_1 or AVX
3022 return false;
3023 }
3024 break;
3025 case Op_MulReductionVL:
3026 if (VM_Version::supports_avx512dq() == false) {
3027 return false;
3028 }
3029 break;
3030 case Op_AbsVB:
3031 case Op_AbsVS:
3032 case Op_AbsVI:
3033 case Op_AddReductionVI:
3034 case Op_AndReductionV:
3035 case Op_OrReductionV:
3036 case Op_XorReductionV:
3037 if (UseSSE < 3) { // requires at least SSSE3
3038 return false;
3039 }
3040 break;
3041 case Op_MaxHF:
3042 case Op_MinHF:
3043 if (!VM_Version::supports_avx512vlbw()) {
3044 return false;
3045 } // fallthrough
3046 case Op_AddHF:
3047 case Op_DivHF:
3048 case Op_FmaHF:
3049 case Op_MulHF:
3050 case Op_ReinterpretS2HF:
3051 case Op_ReinterpretHF2S:
3052 case Op_SubHF:
3053 case Op_SqrtHF:
3054 if (!VM_Version::supports_avx512_fp16()) {
3055 return false;
3056 }
3057 break;
3058 case Op_VectorLoadShuffle:
3059 case Op_VectorRearrange:
3060 case Op_MulReductionVI:
3061 if (UseSSE < 4) { // requires at least SSE4
3062 return false;
3063 }
3064 break;
3065 case Op_IsInfiniteF:
3066 case Op_IsInfiniteD:
3067 if (!VM_Version::supports_avx512dq()) {
3068 return false;
3069 }
3070 break;
3071 case Op_SqrtVD:
3072 case Op_SqrtVF:
3073 case Op_VectorMaskCmp:
3074 case Op_VectorCastB2X:
3075 case Op_VectorCastS2X:
3076 case Op_VectorCastI2X:
3077 case Op_VectorCastL2X:
3078 case Op_VectorCastF2X:
3079 case Op_VectorCastD2X:
3080 case Op_VectorUCastB2X:
3081 case Op_VectorUCastS2X:
3082 case Op_VectorUCastI2X:
3083 case Op_VectorMaskCast:
3084 if (UseAVX < 1) { // enabled for AVX only
3085 return false;
3086 }
3087 break;
3088 case Op_PopulateIndex:
3089 if (UseAVX < 2) {
3090 return false;
3091 }
3092 break;
3093 case Op_RoundVF:
3094 if (UseAVX < 2) { // enabled for AVX2 only
3095 return false;
3096 }
3097 break;
3098 case Op_RoundVD:
3099 if (UseAVX < 3) {
3100 return false; // enabled for AVX3 only
3101 }
3102 break;
3103 case Op_CompareAndSwapL:
3104 case Op_CompareAndSwapP:
3105 break;
3106 case Op_StrIndexOf:
3107 if (!UseSSE42Intrinsics) {
3108 return false;
3109 }
3110 break;
3111 case Op_StrIndexOfChar:
3112 if (!UseSSE42Intrinsics) {
3113 return false;
3114 }
3115 break;
3116 case Op_OnSpinWait:
3117 if (VM_Version::supports_on_spin_wait() == false) {
3118 return false;
3119 }
3120 break;
3121 case Op_MulVB:
3122 case Op_LShiftVB:
3123 case Op_RShiftVB:
3124 case Op_URShiftVB:
3125 case Op_VectorInsert:
3126 case Op_VectorLoadMask:
3127 case Op_VectorStoreMask:
3128 case Op_VectorBlend:
3129 if (UseSSE < 4) {
3130 return false;
3131 }
3132 break;
3133 case Op_MaxD:
3134 case Op_MaxF:
3135 case Op_MinD:
3136 case Op_MinF:
3137 if (UseAVX < 1) { // enabled for AVX only
3138 return false;
3139 }
3140 break;
3141 case Op_CacheWB:
3142 case Op_CacheWBPreSync:
3143 case Op_CacheWBPostSync:
3144 if (!VM_Version::supports_data_cache_line_flush()) {
3145 return false;
3146 }
3147 break;
3148 case Op_ExtractB:
3149 case Op_ExtractL:
3150 case Op_ExtractI:
3151 case Op_RoundDoubleMode:
3152 if (UseSSE < 4) {
3153 return false;
3154 }
3155 break;
3156 case Op_RoundDoubleModeV:
3157 if (VM_Version::supports_avx() == false) {
3158 return false; // 128bit vroundpd is not available
3159 }
3160 break;
3161 case Op_LoadVectorGather:
3162 case Op_LoadVectorGatherMasked:
3163 if (UseAVX < 2) {
3164 return false;
3165 }
3166 break;
3167 case Op_FmaF:
3168 case Op_FmaD:
3169 case Op_FmaVD:
3170 case Op_FmaVF:
3171 if (!UseFMA) {
3172 return false;
3173 }
3174 break;
3175 case Op_MacroLogicV:
3176 if (UseAVX < 3 || !UseVectorMacroLogic) {
3177 return false;
3178 }
3179 break;
3180
3181 case Op_VectorCmpMasked:
3182 case Op_VectorMaskGen:
3183 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3184 return false;
3185 }
3186 break;
3187 case Op_VectorMaskFirstTrue:
3188 case Op_VectorMaskLastTrue:
3189 case Op_VectorMaskTrueCount:
3190 case Op_VectorMaskToLong:
3191 if (UseAVX < 1) {
3192 return false;
3193 }
3194 break;
3195 case Op_RoundF:
3196 case Op_RoundD:
3197 break;
3198 case Op_CopySignD:
3199 case Op_CopySignF:
3200 if (UseAVX < 3) {
3201 return false;
3202 }
3203 if (!VM_Version::supports_avx512vl()) {
3204 return false;
3205 }
3206 break;
3207 case Op_CompressBits:
3208 case Op_ExpandBits:
3209 if (!VM_Version::supports_bmi2()) {
3210 return false;
3211 }
3212 break;
3213 case Op_CompressM:
3214 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
3215 return false;
3216 }
3217 break;
3218 case Op_ConvF2HF:
3219 case Op_ConvHF2F:
3220 if (!VM_Version::supports_float16()) {
3221 return false;
3222 }
3223 break;
3224 case Op_VectorCastF2HF:
3225 case Op_VectorCastHF2F:
3226 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
3227 return false;
3228 }
3229 break;
3230 }
3231 return true; // Match rules are supported by default.
3232 }
3233
3234 //------------------------------------------------------------------------
3235
3236 static inline bool is_pop_count_instr_target(BasicType bt) {
3237 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
3238 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
3239 }
3240
3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
3242 return match_rule_supported_vector(opcode, vlen, bt);
3243 }
3244
3245 // Identify extra cases that we might want to provide match rules for vector nodes and
3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
3248 if (!match_rule_supported(opcode)) {
3249 return false;
3250 }
3251 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
3252 // * SSE2 supports 128bit vectors for all types;
3253 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
3254 // * AVX2 supports 256bit vectors for all types;
3255 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
3256 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
3257 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
3258 // And MaxVectorSize is taken into account as well.
3259 if (!vector_size_supported(bt, vlen)) {
3260 return false;
3261 }
3262 // Special cases which require vector length follow:
3263 // * implementation limitations
3264 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
3265 // * 128bit vroundpd instruction is present only in AVX1
3266 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3267 switch (opcode) {
3268 case Op_MaxVHF:
3269 case Op_MinVHF:
3270 if (!VM_Version::supports_avx512bw()) {
3271 return false;
3272 }
3273 case Op_AddVHF:
3274 case Op_DivVHF:
3275 case Op_FmaVHF:
3276 case Op_MulVHF:
3277 case Op_SubVHF:
3278 case Op_SqrtVHF:
3279 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3280 return false;
3281 }
3282 if (!VM_Version::supports_avx512_fp16()) {
3283 return false;
3284 }
3285 break;
3286 case Op_AbsVF:
3287 case Op_NegVF:
3288 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
3289 return false; // 512bit vandps and vxorps are not available
3290 }
3291 break;
3292 case Op_AbsVD:
3293 case Op_NegVD:
3294 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
3295 return false; // 512bit vpmullq, vandpd and vxorpd are not available
3296 }
3297 break;
3298 case Op_RotateRightV:
3299 case Op_RotateLeftV:
3300 if (bt != T_INT && bt != T_LONG) {
3301 return false;
3302 } // fallthrough
3303 case Op_MacroLogicV:
3304 if (!VM_Version::supports_evex() ||
3305 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
3306 return false;
3307 }
3308 break;
3309 case Op_ClearArray:
3310 case Op_VectorMaskGen:
3311 case Op_VectorCmpMasked:
3312 if (!VM_Version::supports_avx512bw()) {
3313 return false;
3314 }
3315 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
3316 return false;
3317 }
3318 break;
3319 case Op_LoadVectorMasked:
3320 case Op_StoreVectorMasked:
3321 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
3322 return false;
3323 }
3324 break;
3325 case Op_UMinV:
3326 case Op_UMaxV:
3327 if (UseAVX == 0) {
3328 return false;
3329 }
3330 break;
3331 case Op_UMinReductionV:
3332 case Op_UMaxReductionV:
3333 if (UseAVX == 0) {
3334 return false;
3335 }
3336 if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
3337 return false;
3338 }
3339 if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
3340 return false;
3341 }
3342 break;
3343 case Op_MaxV:
3344 case Op_MinV:
3345 if (UseSSE < 4 && is_integral_type(bt)) {
3346 return false;
3347 }
3348 if ((bt == T_FLOAT || bt == T_DOUBLE)) {
3349 // Float/Double intrinsics are enabled for AVX family currently.
3350 if (UseAVX == 0) {
3351 return false;
3352 }
3353 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
3354 return false;
3355 }
3356 }
3357 break;
3358 case Op_CallLeafVector:
3359 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
3360 return false;
3361 }
3362 break;
3363 case Op_AddReductionVI:
3364 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
3365 return false;
3366 }
3367 // fallthrough
3368 case Op_AndReductionV:
3369 case Op_OrReductionV:
3370 case Op_XorReductionV:
3371 if (is_subword_type(bt) && (UseSSE < 4)) {
3372 return false;
3373 }
3374 break;
3375 case Op_MinReductionV:
3376 case Op_MaxReductionV:
3377 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
3378 return false;
3379 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
3380 return false;
3381 }
3382 // Float/Double intrinsics enabled for AVX family.
3383 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
3384 return false;
3385 }
3386 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
3387 return false;
3388 }
3389 break;
3390 case Op_VectorBlend:
3391 if (UseAVX == 0 && size_in_bits < 128) {
3392 return false;
3393 }
3394 break;
3395 case Op_VectorTest:
3396 if (UseSSE < 4) {
3397 return false; // Implementation limitation
3398 } else if (size_in_bits < 32) {
3399 return false; // Implementation limitation
3400 }
3401 break;
3402 case Op_VectorLoadShuffle:
3403 case Op_VectorRearrange:
3404 if(vlen == 2) {
3405 return false; // Implementation limitation due to how shuffle is loaded
3406 } else if (size_in_bits == 256 && UseAVX < 2) {
3407 return false; // Implementation limitation
3408 }
3409 break;
3410 case Op_VectorLoadMask:
3411 case Op_VectorMaskCast:
3412 if (size_in_bits == 256 && UseAVX < 2) {
3413 return false; // Implementation limitation
3414 }
3415 // fallthrough
3416 case Op_VectorStoreMask:
3417 if (vlen == 2) {
3418 return false; // Implementation limitation
3419 }
3420 break;
3421 case Op_PopulateIndex:
3422 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
3423 return false;
3424 }
3425 break;
3426 case Op_VectorCastB2X:
3427 case Op_VectorCastS2X:
3428 case Op_VectorCastI2X:
3429 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
3430 return false;
3431 }
3432 break;
3433 case Op_VectorCastL2X:
3434 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
3435 return false;
3436 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
3437 return false;
3438 }
3439 break;
3440 case Op_VectorCastF2X: {
3441 // As per JLS section 5.1.3 narrowing conversion to sub-word types
3442 // happen after intermediate conversion to integer and special handling
3443 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
3444 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
3445 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
3446 return false;
3447 }
3448 }
3449 // fallthrough
3450 case Op_VectorCastD2X:
3451 if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
3452 return false;
3453 }
3454 break;
3455 case Op_VectorCastF2HF:
3456 case Op_VectorCastHF2F:
3457 if (!VM_Version::supports_f16c() &&
3458 ((!VM_Version::supports_evex() ||
3459 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
3460 return false;
3461 }
3462 break;
3463 case Op_RoundVD:
3464 if (!VM_Version::supports_avx512dq()) {
3465 return false;
3466 }
3467 break;
3468 case Op_MulReductionVI:
3469 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3470 return false;
3471 }
3472 break;
3473 case Op_LoadVectorGatherMasked:
3474 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3475 return false;
3476 }
3477 if (is_subword_type(bt) &&
3478 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
3479 (size_in_bits < 64) ||
3480 (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
3481 return false;
3482 }
3483 break;
3484 case Op_StoreVectorScatterMasked:
3485 case Op_StoreVectorScatter:
3486 if (is_subword_type(bt)) {
3487 return false;
3488 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3489 return false;
3490 }
3491 // fallthrough
3492 case Op_LoadVectorGather:
3493 if (!is_subword_type(bt) && size_in_bits == 64) {
3494 return false;
3495 }
3496 if (is_subword_type(bt) && size_in_bits < 64) {
3497 return false;
3498 }
3499 break;
3500 case Op_SaturatingAddV:
3501 case Op_SaturatingSubV:
3502 if (UseAVX < 1) {
3503 return false; // Implementation limitation
3504 }
3505 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
3506 return false;
3507 }
3508 break;
3509 case Op_SelectFromTwoVector:
3510 if (size_in_bits < 128) {
3511 return false;
3512 }
3513 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3514 return false;
3515 }
3516 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3517 return false;
3518 }
3519 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3520 return false;
3521 }
3522 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
3523 return false;
3524 }
3525 break;
3526 case Op_MaskAll:
3527 if (!VM_Version::supports_evex()) {
3528 return false;
3529 }
3530 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
3531 return false;
3532 }
3533 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3534 return false;
3535 }
3536 break;
3537 case Op_VectorMaskCmp:
3538 if (vlen < 2 || size_in_bits < 32) {
3539 return false;
3540 }
3541 break;
3542 case Op_CompressM:
3543 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
3544 return false;
3545 }
3546 break;
3547 case Op_CompressV:
3548 case Op_ExpandV:
3549 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
3550 return false;
3551 }
3552 if (size_in_bits < 128 ) {
3553 return false;
3554 }
3555 case Op_VectorLongToMask:
3556 if (UseAVX < 1) {
3557 return false;
3558 }
3559 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
3560 return false;
3561 }
3562 break;
3563 case Op_SignumVD:
3564 case Op_SignumVF:
3565 if (UseAVX < 1) {
3566 return false;
3567 }
3568 break;
3569 case Op_PopCountVI:
3570 case Op_PopCountVL: {
3571 if (!is_pop_count_instr_target(bt) &&
3572 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
3573 return false;
3574 }
3575 }
3576 break;
3577 case Op_ReverseV:
3578 case Op_ReverseBytesV:
3579 if (UseAVX < 2) {
3580 return false;
3581 }
3582 break;
3583 case Op_CountTrailingZerosV:
3584 case Op_CountLeadingZerosV:
3585 if (UseAVX < 2) {
3586 return false;
3587 }
3588 break;
3589 }
3590 return true; // Per default match rules are supported.
3591 }
3592
3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
3594 // ADLC based match_rule_supported routine checks for the existence of pattern based
3595 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
3596 // of their non-masked counterpart with mask edge being the differentiator.
3597 // This routine does a strict check on the existence of masked operation patterns
3598 // by returning a default false value for all the other opcodes apart from the
3599 // ones whose masked instruction patterns are defined in this file.
3600 if (!match_rule_supported_vector(opcode, vlen, bt)) {
3601 return false;
3602 }
3603
3604 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
3605 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
3606 return false;
3607 }
3608 switch(opcode) {
3609 // Unary masked operations
3610 case Op_AbsVB:
3611 case Op_AbsVS:
3612 if(!VM_Version::supports_avx512bw()) {
3613 return false; // Implementation limitation
3614 }
3615 case Op_AbsVI:
3616 case Op_AbsVL:
3617 return true;
3618
3619 // Ternary masked operations
3620 case Op_FmaVF:
3621 case Op_FmaVD:
3622 return true;
3623
3624 case Op_MacroLogicV:
3625 if(bt != T_INT && bt != T_LONG) {
3626 return false;
3627 }
3628 return true;
3629
3630 // Binary masked operations
3631 case Op_AddVB:
3632 case Op_AddVS:
3633 case Op_SubVB:
3634 case Op_SubVS:
3635 case Op_MulVS:
3636 case Op_LShiftVS:
3637 case Op_RShiftVS:
3638 case Op_URShiftVS:
3639 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3640 if (!VM_Version::supports_avx512bw()) {
3641 return false; // Implementation limitation
3642 }
3643 return true;
3644
3645 case Op_MulVL:
3646 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3647 if (!VM_Version::supports_avx512dq()) {
3648 return false; // Implementation limitation
3649 }
3650 return true;
3651
3652 case Op_AndV:
3653 case Op_OrV:
3654 case Op_XorV:
3655 case Op_RotateRightV:
3656 case Op_RotateLeftV:
3657 if (bt != T_INT && bt != T_LONG) {
3658 return false; // Implementation limitation
3659 }
3660 return true;
3661
3662 case Op_VectorLoadMask:
3663 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
3664 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3665 return false;
3666 }
3667 return true;
3668
3669 case Op_AddVI:
3670 case Op_AddVL:
3671 case Op_AddVF:
3672 case Op_AddVD:
3673 case Op_SubVI:
3674 case Op_SubVL:
3675 case Op_SubVF:
3676 case Op_SubVD:
3677 case Op_MulVI:
3678 case Op_MulVF:
3679 case Op_MulVD:
3680 case Op_DivVF:
3681 case Op_DivVD:
3682 case Op_SqrtVF:
3683 case Op_SqrtVD:
3684 case Op_LShiftVI:
3685 case Op_LShiftVL:
3686 case Op_RShiftVI:
3687 case Op_RShiftVL:
3688 case Op_URShiftVI:
3689 case Op_URShiftVL:
3690 case Op_LoadVectorMasked:
3691 case Op_StoreVectorMasked:
3692 case Op_LoadVectorGatherMasked:
3693 case Op_StoreVectorScatterMasked:
3694 return true;
3695
3696 case Op_UMinV:
3697 case Op_UMaxV:
3698 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
3699 return false;
3700 } // fallthrough
3701 case Op_MaxV:
3702 case Op_MinV:
3703 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3704 return false; // Implementation limitation
3705 }
3706 if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
3707 return false; // Implementation limitation
3708 }
3709 return true;
3710 case Op_SaturatingAddV:
3711 case Op_SaturatingSubV:
3712 if (!is_subword_type(bt)) {
3713 return false;
3714 }
3715 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
3716 return false; // Implementation limitation
3717 }
3718 return true;
3719
3720 case Op_VectorMaskCmp:
3721 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
3722 return false; // Implementation limitation
3723 }
3724 return true;
3725
3726 case Op_VectorRearrange:
3727 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
3728 return false; // Implementation limitation
3729 }
3730 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
3731 return false; // Implementation limitation
3732 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
3733 return false; // Implementation limitation
3734 }
3735 return true;
3736
3737 // Binary Logical operations
3738 case Op_AndVMask:
3739 case Op_OrVMask:
3740 case Op_XorVMask:
3741 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
3742 return false; // Implementation limitation
3743 }
3744 return true;
3745
3746 case Op_PopCountVI:
3747 case Op_PopCountVL:
3748 if (!is_pop_count_instr_target(bt)) {
3749 return false;
3750 }
3751 return true;
3752
3753 case Op_MaskAll:
3754 return true;
3755
3756 case Op_CountLeadingZerosV:
3757 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
3758 return true;
3759 }
3760 default:
3761 return false;
3762 }
3763 }
3764
3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
3766 return false;
3767 }
3768
3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
3771 switch (elem_bt) {
3772 case T_BYTE: return false;
3773 case T_SHORT: return !VM_Version::supports_avx512bw();
3774 case T_INT: return !VM_Version::supports_avx();
3775 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl();
3776 default:
3777 ShouldNotReachHere();
3778 return false;
3779 }
3780 }
3781
3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
3783 // Prefer predicate if the mask type is "TypePVectMask".
3784 return vt->isa_pvectmask() != nullptr;
3785 }
3786
3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
3788 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
3789 bool legacy = (generic_opnd->opcode() == LEGVEC);
3790 if (!VM_Version::supports_avx512vlbwdq() && // KNL
3791 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
3792 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
3793 return new legVecZOper();
3794 }
3795 if (legacy) {
3796 switch (ideal_reg) {
3797 case Op_VecS: return new legVecSOper();
3798 case Op_VecD: return new legVecDOper();
3799 case Op_VecX: return new legVecXOper();
3800 case Op_VecY: return new legVecYOper();
3801 case Op_VecZ: return new legVecZOper();
3802 }
3803 } else {
3804 switch (ideal_reg) {
3805 case Op_VecS: return new vecSOper();
3806 case Op_VecD: return new vecDOper();
3807 case Op_VecX: return new vecXOper();
3808 case Op_VecY: return new vecYOper();
3809 case Op_VecZ: return new vecZOper();
3810 }
3811 }
3812 ShouldNotReachHere();
3813 return nullptr;
3814 }
3815
3816 bool Matcher::is_reg2reg_move(MachNode* m) {
3817 switch (m->rule()) {
3818 case MoveVec2Leg_rule:
3819 case MoveLeg2Vec_rule:
3820 case MoveF2VL_rule:
3821 case MoveF2LEG_rule:
3822 case MoveVL2F_rule:
3823 case MoveLEG2F_rule:
3824 case MoveD2VL_rule:
3825 case MoveD2LEG_rule:
3826 case MoveVL2D_rule:
3827 case MoveLEG2D_rule:
3828 return true;
3829 default:
3830 return false;
3831 }
3832 }
3833
3834 bool Matcher::is_generic_vector(MachOper* opnd) {
3835 switch (opnd->opcode()) {
3836 case VEC:
3837 case LEGVEC:
3838 return true;
3839 default:
3840 return false;
3841 }
3842 }
3843
3844 //------------------------------------------------------------------------
3845
3846 const RegMask* Matcher::predicate_reg_mask(void) {
3847 return &_VECTMASK_REG_mask;
3848 }
3849
3850 // Max vector size in bytes. 0 if not supported.
3851 int Matcher::vector_width_in_bytes(BasicType bt) {
3852 assert(is_java_primitive(bt), "only primitive type vectors");
3853 // SSE2 supports 128bit vectors for all types.
3854 // AVX2 supports 256bit vectors for all types.
3855 // AVX2/EVEX supports 512bit vectors for all types.
3856 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
3857 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
3858 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
3859 size = (UseAVX > 2) ? 64 : 32;
3860 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
3861 size = (VM_Version::supports_avx512bw()) ? 64 : 32;
3862 // Use flag to limit vector size.
3863 size = MIN2(size,(int)MaxVectorSize);
3864 // Minimum 2 values in vector (or 4 for bytes).
3865 switch (bt) {
3866 case T_DOUBLE:
3867 case T_LONG:
3868 if (size < 16) return 0;
3869 break;
3870 case T_FLOAT:
3871 case T_INT:
3872 if (size < 8) return 0;
3873 break;
3874 case T_BOOLEAN:
3875 if (size < 4) return 0;
3876 break;
3877 case T_CHAR:
3878 if (size < 4) return 0;
3879 break;
3880 case T_BYTE:
3881 if (size < 4) return 0;
3882 break;
3883 case T_SHORT:
3884 if (size < 4) return 0;
3885 break;
3886 default:
3887 ShouldNotReachHere();
3888 }
3889 return size;
3890 }
3891
3892 // Limits on vector size (number of elements) loaded into vector.
3893 int Matcher::max_vector_size(const BasicType bt) {
3894 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3895 }
3896 int Matcher::min_vector_size(const BasicType bt) {
3897 int max_size = max_vector_size(bt);
3898 // Min size which can be loaded into vector is 4 bytes.
3899 int size = (type2aelembytes(bt) == 1) ? 4 : 2;
3900 // Support for calling svml double64 vectors
3901 if (bt == T_DOUBLE) {
3902 size = 1;
3903 }
3904 return MIN2(size,max_size);
3905 }
3906
3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
3908 // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
3909 // by default on Cascade Lake
3910 if (VM_Version::is_default_intel_cascade_lake()) {
3911 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
3912 }
3913 return Matcher::max_vector_size(bt);
3914 }
3915
3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
3917 return -1;
3918 }
3919
3920 // Vector ideal reg corresponding to specified size in bytes
3921 uint Matcher::vector_ideal_reg(int size) {
3922 assert(MaxVectorSize >= size, "");
3923 switch(size) {
3924 case 4: return Op_VecS;
3925 case 8: return Op_VecD;
3926 case 16: return Op_VecX;
3927 case 32: return Op_VecY;
3928 case 64: return Op_VecZ;
3929 }
3930 ShouldNotReachHere();
3931 return 0;
3932 }
3933
3934 // Check for shift by small constant as well
3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
3936 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
3937 shift->in(2)->get_int() <= 3 &&
3938 // Are there other uses besides address expressions?
3939 !matcher->is_visited(shift)) {
3940 address_visited.set(shift->_idx); // Flag as address_visited
3941 mstack.push(shift->in(2), Matcher::Visit);
3942 Node *conv = shift->in(1);
3943 // Allow Matcher to match the rule which bypass
3944 // ConvI2L operation for an array index on LP64
3945 // if the index value is positive.
3946 if (conv->Opcode() == Op_ConvI2L &&
3947 conv->as_Type()->type()->is_long()->_lo >= 0 &&
3948 // Are there other uses besides address expressions?
3949 !matcher->is_visited(conv)) {
3950 address_visited.set(conv->_idx); // Flag as address_visited
3951 mstack.push(conv->in(1), Matcher::Pre_Visit);
3952 } else {
3953 mstack.push(conv, Matcher::Pre_Visit);
3954 }
3955 return true;
3956 }
3957 return false;
3958 }
3959
3960 // This function identifies sub-graphs in which a 'load' node is
3961 // input to two different nodes, and such that it can be matched
3962 // with BMI instructions like blsi, blsr, etc.
3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
3965 // refers to the same node.
3966 //
3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
3968 // This is a temporary solution until we make DAGs expressible in ADL.
3969 template<typename ConType>
3970 class FusedPatternMatcher {
3971 Node* _op1_node;
3972 Node* _mop_node;
3973 int _con_op;
3974
3975 static int match_next(Node* n, int next_op, int next_op_idx) {
3976 if (n->in(1) == nullptr || n->in(2) == nullptr) {
3977 return -1;
3978 }
3979
3980 if (next_op_idx == -1) { // n is commutative, try rotations
3981 if (n->in(1)->Opcode() == next_op) {
3982 return 1;
3983 } else if (n->in(2)->Opcode() == next_op) {
3984 return 2;
3985 }
3986 } else {
3987 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
3988 if (n->in(next_op_idx)->Opcode() == next_op) {
3989 return next_op_idx;
3990 }
3991 }
3992 return -1;
3993 }
3994
3995 public:
3996 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
3997 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
3998
3999 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
4000 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
4001 typename ConType::NativeType con_value) {
4002 if (_op1_node->Opcode() != op1) {
4003 return false;
4004 }
4005 if (_mop_node->outcnt() > 2) {
4006 return false;
4007 }
4008 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
4009 if (op1_op2_idx == -1) {
4010 return false;
4011 }
4012 // Memory operation must be the other edge
4013 int op1_mop_idx = (op1_op2_idx & 1) + 1;
4014
4015 // Check that the mop node is really what we want
4016 if (_op1_node->in(op1_mop_idx) == _mop_node) {
4017 Node* op2_node = _op1_node->in(op1_op2_idx);
4018 if (op2_node->outcnt() > 1) {
4019 return false;
4020 }
4021 assert(op2_node->Opcode() == op2, "Should be");
4022 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
4023 if (op2_con_idx == -1) {
4024 return false;
4025 }
4026 // Memory operation must be the other edge
4027 int op2_mop_idx = (op2_con_idx & 1) + 1;
4028 // Check that the memory operation is the same node
4029 if (op2_node->in(op2_mop_idx) == _mop_node) {
4030 // Now check the constant
4031 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
4032 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
4033 return true;
4034 }
4035 }
4036 }
4037 return false;
4038 }
4039 };
4040
4041 static bool is_bmi_pattern(Node* n, Node* m) {
4042 assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
4043 if (n != nullptr && m != nullptr) {
4044 if (m->Opcode() == Op_LoadI) {
4045 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
4046 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
4047 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
4048 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
4049 } else if (m->Opcode() == Op_LoadL) {
4050 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
4051 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
4052 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
4053 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
4054 }
4055 }
4056 return false;
4057 }
4058
4059 // Should the matcher clone input 'm' of node 'n'?
4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
4061 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
4062 if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
4063 mstack.push(m, Visit);
4064 return true;
4065 }
4066 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
4067 mstack.push(m, Visit); // m = ShiftCntV
4068 return true;
4069 }
4070 if (is_encode_and_store_pattern(n, m)) {
4071 mstack.push(m, Visit);
4072 return true;
4073 }
4074 return false;
4075 }
4076
4077 // Should the Matcher clone shifts on addressing modes, expecting them
4078 // to be subsumed into complex addressing expressions or compute them
4079 // into registers?
4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
4081 Node *off = m->in(AddPNode::Offset);
4082 if (off->is_Con()) {
4083 address_visited.test_set(m->_idx); // Flag as address_visited
4084 Node *adr = m->in(AddPNode::Address);
4085
4086 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
4087 // AtomicAdd is not an addressing expression.
4088 // Cheap to find it by looking for screwy base.
4089 if (adr->is_AddP() &&
4090 !adr->in(AddPNode::Base)->is_top() &&
4091 !adr->in(AddPNode::Offset)->is_Con() &&
4092 off->get_long() == (int) (off->get_long()) && // immL32
4093 // Are there other uses besides address expressions?
4094 !is_visited(adr)) {
4095 address_visited.set(adr->_idx); // Flag as address_visited
4096 Node *shift = adr->in(AddPNode::Offset);
4097 if (!clone_shift(shift, this, mstack, address_visited)) {
4098 mstack.push(shift, Pre_Visit);
4099 }
4100 mstack.push(adr->in(AddPNode::Address), Pre_Visit);
4101 mstack.push(adr->in(AddPNode::Base), Pre_Visit);
4102 } else {
4103 mstack.push(adr, Pre_Visit);
4104 }
4105
4106 // Clone X+offset as it also folds into most addressing expressions
4107 mstack.push(off, Visit);
4108 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4109 return true;
4110 } else if (clone_shift(off, this, mstack, address_visited)) {
4111 address_visited.test_set(m->_idx); // Flag as address_visited
4112 mstack.push(m->in(AddPNode::Address), Pre_Visit);
4113 mstack.push(m->in(AddPNode::Base), Pre_Visit);
4114 return true;
4115 }
4116 return false;
4117 }
4118
4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
4120 switch (bt) {
4121 case BoolTest::eq:
4122 return Assembler::eq;
4123 case BoolTest::ne:
4124 return Assembler::neq;
4125 case BoolTest::le:
4126 case BoolTest::ule:
4127 return Assembler::le;
4128 case BoolTest::ge:
4129 case BoolTest::uge:
4130 return Assembler::nlt;
4131 case BoolTest::lt:
4132 case BoolTest::ult:
4133 return Assembler::lt;
4134 case BoolTest::gt:
4135 case BoolTest::ugt:
4136 return Assembler::nle;
4137 default : ShouldNotReachHere(); return Assembler::_false;
4138 }
4139 }
4140
4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
4142 switch (bt) {
4143 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
4144 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
4145 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
4146 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
4147 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
4148 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
4149 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
4150 default: ShouldNotReachHere(); return Assembler::FALSE_OS;
4151 }
4152 }
4153
4154 // Helper methods for MachSpillCopyNode::implementation().
4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
4156 int src_hi, int dst_hi, uint ireg, outputStream* st) {
4157 assert(ireg == Op_VecS || // 32bit vector
4158 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
4159 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
4160 "no non-adjacent vector moves" );
4161 if (masm) {
4162 switch (ireg) {
4163 case Op_VecS: // copy whole register
4164 case Op_VecD:
4165 case Op_VecX:
4166 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4167 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4168 } else {
4169 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4170 }
4171 break;
4172 case Op_VecY:
4173 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4174 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
4175 } else {
4176 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
4177 }
4178 break;
4179 case Op_VecZ:
4180 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
4181 break;
4182 default:
4183 ShouldNotReachHere();
4184 }
4185 #ifndef PRODUCT
4186 } else {
4187 switch (ireg) {
4188 case Op_VecS:
4189 case Op_VecD:
4190 case Op_VecX:
4191 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4192 break;
4193 case Op_VecY:
4194 case Op_VecZ:
4195 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
4196 break;
4197 default:
4198 ShouldNotReachHere();
4199 }
4200 #endif
4201 }
4202 }
4203
4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
4205 int stack_offset, int reg, uint ireg, outputStream* st) {
4206 if (masm) {
4207 if (is_load) {
4208 switch (ireg) {
4209 case Op_VecS:
4210 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4211 break;
4212 case Op_VecD:
4213 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4214 break;
4215 case Op_VecX:
4216 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4217 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4218 } else {
4219 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4220 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4221 }
4222 break;
4223 case Op_VecY:
4224 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4225 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
4226 } else {
4227 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4228 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
4229 }
4230 break;
4231 case Op_VecZ:
4232 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
4233 break;
4234 default:
4235 ShouldNotReachHere();
4236 }
4237 } else { // store
4238 switch (ireg) {
4239 case Op_VecS:
4240 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4241 break;
4242 case Op_VecD:
4243 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4244 break;
4245 case Op_VecX:
4246 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4247 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4248 }
4249 else {
4250 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4251 }
4252 break;
4253 case Op_VecY:
4254 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
4255 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
4256 }
4257 else {
4258 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
4259 }
4260 break;
4261 case Op_VecZ:
4262 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
4263 break;
4264 default:
4265 ShouldNotReachHere();
4266 }
4267 }
4268 #ifndef PRODUCT
4269 } else {
4270 if (is_load) {
4271 switch (ireg) {
4272 case Op_VecS:
4273 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4274 break;
4275 case Op_VecD:
4276 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4277 break;
4278 case Op_VecX:
4279 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4280 break;
4281 case Op_VecY:
4282 case Op_VecZ:
4283 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
4284 break;
4285 default:
4286 ShouldNotReachHere();
4287 }
4288 } else { // store
4289 switch (ireg) {
4290 case Op_VecS:
4291 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4292 break;
4293 case Op_VecD:
4294 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4295 break;
4296 case Op_VecX:
4297 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4298 break;
4299 case Op_VecY:
4300 case Op_VecZ:
4301 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
4302 break;
4303 default:
4304 ShouldNotReachHere();
4305 }
4306 }
4307 #endif
4308 }
4309 }
4310
4311 template <class T>
4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
4313 int size = type2aelembytes(bt) * len;
4314 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
4315 for (int i = 0; i < len; i++) {
4316 int offset = i * type2aelembytes(bt);
4317 switch (bt) {
4318 case T_BYTE: val->at(i) = con; break;
4319 case T_SHORT: {
4320 jshort c = con;
4321 memcpy(val->adr_at(offset), &c, sizeof(jshort));
4322 break;
4323 }
4324 case T_INT: {
4325 jint c = con;
4326 memcpy(val->adr_at(offset), &c, sizeof(jint));
4327 break;
4328 }
4329 case T_LONG: {
4330 jlong c = con;
4331 memcpy(val->adr_at(offset), &c, sizeof(jlong));
4332 break;
4333 }
4334 case T_FLOAT: {
4335 jfloat c = con;
4336 memcpy(val->adr_at(offset), &c, sizeof(jfloat));
4337 break;
4338 }
4339 case T_DOUBLE: {
4340 jdouble c = con;
4341 memcpy(val->adr_at(offset), &c, sizeof(jdouble));
4342 break;
4343 }
4344 default: assert(false, "%s", type2name(bt));
4345 }
4346 }
4347 return val;
4348 }
4349
4350 static inline jlong high_bit_set(BasicType bt) {
4351 switch (bt) {
4352 case T_BYTE: return 0x8080808080808080;
4353 case T_SHORT: return 0x8000800080008000;
4354 case T_INT: return 0x8000000080000000;
4355 case T_LONG: return 0x8000000000000000;
4356 default:
4357 ShouldNotReachHere();
4358 return 0;
4359 }
4360 }
4361
4362 #ifndef PRODUCT
4363 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
4364 st->print("nop \t# %d bytes pad for loops and calls", _count);
4365 }
4366 #endif
4367
4368 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
4369 __ nop(_count);
4370 }
4371
4372 uint MachNopNode::size(PhaseRegAlloc*) const {
4373 return _count;
4374 }
4375
4376 #ifndef PRODUCT
4377 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
4378 st->print("# breakpoint");
4379 }
4380 #endif
4381
4382 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
4383 __ int3();
4384 }
4385
4386 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
4387 return MachNode::size(ra_);
4388 }
4389
4390 %}
4391
4392 //----------ENCODING BLOCK-----------------------------------------------------
4393 // This block specifies the encoding classes used by the compiler to
4394 // output byte streams. Encoding classes are parameterized macros
4395 // used by Machine Instruction Nodes in order to generate the bit
4396 // encoding of the instruction. Operands specify their base encoding
4397 // interface with the interface keyword. There are currently
4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
4399 // COND_INTER. REG_INTER causes an operand to generate a function
4400 // which returns its register number when queried. CONST_INTER causes
4401 // an operand to generate a function which returns the value of the
4402 // constant when queried. MEMORY_INTER causes an operand to generate
4403 // four functions which return the Base Register, the Index Register,
4404 // the Scale Value, and the Offset Value of the operand when queried.
4405 // COND_INTER causes an operand to generate six functions which return
4406 // the encoding code (ie - encoding bits for the instruction)
4407 // associated with each basic boolean condition for a conditional
4408 // instruction.
4409 //
4410 // Instructions specify two basic values for encoding. Again, a
4411 // function is available to check if the constant displacement is an
4412 // oop. They use the ins_encode keyword to specify their encoding
4413 // classes (which must be a sequence of enc_class names, and their
4414 // parameters, specified in the encoding block), and they use the
4415 // opcode keyword to specify, in order, their primary, secondary, and
4416 // tertiary opcode. Only the opcode sections which a particular
4417 // instruction needs for encoding need to be specified.
4418 encode %{
4419 enc_class cdql_enc(no_rax_rdx_RegI div)
4420 %{
4421 // Full implementation of Java idiv and irem; checks for
4422 // special case as described in JVM spec., p.243 & p.271.
4423 //
4424 // normal case special case
4425 //
4426 // input : rax: dividend min_int
4427 // reg: divisor -1
4428 //
4429 // output: rax: quotient (= rax idiv reg) min_int
4430 // rdx: remainder (= rax irem reg) 0
4431 //
4432 // Code sequnce:
4433 //
4434 // 0: 3d 00 00 00 80 cmp $0x80000000,%eax
4435 // 5: 75 07/08 jne e <normal>
4436 // 7: 33 d2 xor %edx,%edx
4437 // [div >= 8 -> offset + 1]
4438 // [REX_B]
4439 // 9: 83 f9 ff cmp $0xffffffffffffffff,$div
4440 // c: 74 03/04 je 11 <done>
4441 // 000000000000000e <normal>:
4442 // e: 99 cltd
4443 // [div >= 8 -> offset + 1]
4444 // [REX_B]
4445 // f: f7 f9 idiv $div
4446 // 0000000000000011 <done>:
4447 Label normal;
4448 Label done;
4449
4450 // cmp $0x80000000,%eax
4451 __ cmpl(as_Register(RAX_enc), 0x80000000);
4452
4453 // jne e <normal>
4454 __ jccb(Assembler::notEqual, normal);
4455
4456 // xor %edx,%edx
4457 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4458
4459 // cmp $0xffffffffffffffff,%ecx
4460 __ cmpl($div$$Register, -1);
4461
4462 // je 11 <done>
4463 __ jccb(Assembler::equal, done);
4464
4465 // <normal>
4466 // cltd
4467 __ bind(normal);
4468 __ cdql();
4469
4470 // idivl
4471 // <done>
4472 __ idivl($div$$Register);
4473 __ bind(done);
4474 %}
4475
4476 enc_class cdqq_enc(no_rax_rdx_RegL div)
4477 %{
4478 // Full implementation of Java ldiv and lrem; checks for
4479 // special case as described in JVM spec., p.243 & p.271.
4480 //
4481 // normal case special case
4482 //
4483 // input : rax: dividend min_long
4484 // reg: divisor -1
4485 //
4486 // output: rax: quotient (= rax idiv reg) min_long
4487 // rdx: remainder (= rax irem reg) 0
4488 //
4489 // Code sequnce:
4490 //
4491 // 0: 48 ba 00 00 00 00 00 mov $0x8000000000000000,%rdx
4492 // 7: 00 00 80
4493 // a: 48 39 d0 cmp %rdx,%rax
4494 // d: 75 08 jne 17 <normal>
4495 // f: 33 d2 xor %edx,%edx
4496 // 11: 48 83 f9 ff cmp $0xffffffffffffffff,$div
4497 // 15: 74 05 je 1c <done>
4498 // 0000000000000017 <normal>:
4499 // 17: 48 99 cqto
4500 // 19: 48 f7 f9 idiv $div
4501 // 000000000000001c <done>:
4502 Label normal;
4503 Label done;
4504
4505 // mov $0x8000000000000000,%rdx
4506 __ mov64(as_Register(RDX_enc), 0x8000000000000000);
4507
4508 // cmp %rdx,%rax
4509 __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
4510
4511 // jne 17 <normal>
4512 __ jccb(Assembler::notEqual, normal);
4513
4514 // xor %edx,%edx
4515 __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
4516
4517 // cmp $0xffffffffffffffff,$div
4518 __ cmpq($div$$Register, -1);
4519
4520 // je 1e <done>
4521 __ jccb(Assembler::equal, done);
4522
4523 // <normal>
4524 // cqto
4525 __ bind(normal);
4526 __ cdqq();
4527
4528 // idivq (note: must be emitted by the user of this rule)
4529 // <done>
4530 __ idivq($div$$Register);
4531 __ bind(done);
4532 %}
4533
4534 enc_class clear_avx %{
4535 DEBUG_ONLY(int off0 = __ offset());
4536 if (generate_vzeroupper(Compile::current())) {
4537 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
4538 // Clear upper bits of YMM registers when current compiled code uses
4539 // wide vectors to avoid AVX <-> SSE transition penalty during call.
4540 __ vzeroupper();
4541 }
4542 DEBUG_ONLY(int off1 = __ offset());
4543 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
4544 %}
4545
4546 enc_class Java_To_Runtime(method meth) %{
4547 __ lea(r10, RuntimeAddress((address)$meth$$method));
4548 __ call(r10);
4549 __ post_call_nop();
4550 %}
4551
4552 enc_class Java_Static_Call(method meth)
4553 %{
4554 // JAVA STATIC CALL
4555 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
4556 // determine who we intended to call.
4557 if (!_method) {
4558 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
4559 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
4560 // The NOP here is purely to ensure that eliding a call to
4561 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
4562 __ nop(5);
4563 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
4564 } else {
4565 int method_index = resolved_method_index(masm);
4566 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4567 : static_call_Relocation::spec(method_index);
4568 address mark = __ pc();
4569 int call_offset = __ offset();
4570 __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
4571 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
4572 // Calls of the same statically bound method can share
4573 // a stub to the interpreter.
4574 __ code()->shared_stub_to_interp_for(_method, call_offset);
4575 } else {
4576 // Emit stubs for static call.
4577 address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
4578 __ clear_inst_mark();
4579 if (stub == nullptr) {
4580 ciEnv::current()->record_failure("CodeCache is full");
4581 return;
4582 }
4583 }
4584 }
4585 __ post_call_nop();
4586 %}
4587
4588 enc_class Java_Dynamic_Call(method meth) %{
4589 __ ic_call((address)$meth$$method, resolved_method_index(masm));
4590 __ post_call_nop();
4591 %}
4592
4593 enc_class call_epilog %{
4594 if (VerifyStackAtCalls) {
4595 // Check that stack depth is unchanged: find majik cookie on stack
4596 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
4597 Label L;
4598 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4599 __ jccb(Assembler::equal, L);
4600 // Die if stack mismatch
4601 __ int3();
4602 __ bind(L);
4603 }
4604 %}
4605
4606 %}
4607
4608 //----------FRAME--------------------------------------------------------------
4609 // Definition of frame structure and management information.
4610 //
4611 // S T A C K L A Y O U T Allocators stack-slot number
4612 // | (to get allocators register number
4613 // G Owned by | | v add OptoReg::stack0())
4614 // r CALLER | |
4615 // o | +--------+ pad to even-align allocators stack-slot
4616 // w V | pad0 | numbers; owned by CALLER
4617 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4618 // h ^ | in | 5
4619 // | | args | 4 Holes in incoming args owned by SELF
4620 // | | | | 3
4621 // | | +--------+
4622 // V | | old out| Empty on Intel, window on Sparc
4623 // | old |preserve| Must be even aligned.
4624 // | SP-+--------+----> Matcher::_old_SP, even aligned
4625 // | | in | 3 area for Intel ret address
4626 // Owned by |preserve| Empty on Sparc.
4627 // SELF +--------+
4628 // | | pad2 | 2 pad to align old SP
4629 // | +--------+ 1
4630 // | | locks | 0
4631 // | +--------+----> OptoReg::stack0(), even aligned
4632 // | | pad1 | 11 pad to align new SP
4633 // | +--------+
4634 // | | | 10
4635 // | | spills | 9 spills
4636 // V | | 8 (pad0 slot for callee)
4637 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
4638 // ^ | out | 7
4639 // | | args | 6 Holes in outgoing args owned by CALLEE
4640 // Owned by +--------+
4641 // CALLEE | new out| 6 Empty on Intel, window on Sparc
4642 // | new |preserve| Must be even-aligned.
4643 // | SP-+--------+----> Matcher::_new_SP, even aligned
4644 // | | |
4645 //
4646 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
4647 // known from SELF's arguments and the Java calling convention.
4648 // Region 6-7 is determined per call site.
4649 // Note 2: If the calling convention leaves holes in the incoming argument
4650 // area, those holes are owned by SELF. Holes in the outgoing area
4651 // are owned by the CALLEE. Holes should not be necessary in the
4652 // incoming area, as the Java calling convention is completely under
4653 // the control of the AD file. Doubles can be sorted and packed to
4654 // avoid holes. Holes in the outgoing arguments may be necessary for
4655 // varargs C calling conventions.
4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
4657 // even aligned with pad0 as needed.
4658 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
4659 // region 6-11 is even aligned; it may be padded out more so that
4660 // the region from SP to FP meets the minimum stack alignment.
4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
4662 // alignment. Region 11, pad1, may be dynamically extended so that
4663 // SP meets the minimum alignment.
4664
4665 frame
4666 %{
4667 // These three registers define part of the calling convention
4668 // between compiled code and the interpreter.
4669 inline_cache_reg(RAX); // Inline Cache Register
4670
4671 // Optional: name the operand used by cisc-spilling to access
4672 // [stack_pointer + offset]
4673 cisc_spilling_operand_name(indOffset32);
4674
4675 // Number of stack slots consumed by locking an object
4676 sync_stack_slots(2);
4677
4678 // Compiled code's Frame Pointer
4679 frame_pointer(RSP);
4680
4681 // Stack alignment requirement
4682 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
4683
4684 // Number of outgoing stack slots killed above the out_preserve_stack_slots
4685 // for calls to C. Supports the var-args backing area for register parms.
4686 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
4687
4688 // The after-PROLOG location of the return address. Location of
4689 // return address specifies a type (REG or STACK) and a number
4690 // representing the register number (i.e. - use a register name) or
4691 // stack slot.
4692 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4693 // Otherwise, it is above the locks and verification slot and alignment word
4694 return_addr(STACK - 2 +
4695 align_up((Compile::current()->in_preserve_stack_slots() +
4696 Compile::current()->fixed_slots()),
4697 stack_alignment_in_slots()));
4698
4699 // Location of compiled Java return values. Same as C for now.
4700 return_value
4701 %{
4702 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
4703 "only return normal values");
4704
4705 static const int lo[Op_RegL + 1] = {
4706 0,
4707 0,
4708 RAX_num, // Op_RegN
4709 RAX_num, // Op_RegI
4710 RAX_num, // Op_RegP
4711 XMM0_num, // Op_RegF
4712 XMM0_num, // Op_RegD
4713 RAX_num // Op_RegL
4714 };
4715 static const int hi[Op_RegL + 1] = {
4716 0,
4717 0,
4718 OptoReg::Bad, // Op_RegN
4719 OptoReg::Bad, // Op_RegI
4720 RAX_H_num, // Op_RegP
4721 OptoReg::Bad, // Op_RegF
4722 XMM0b_num, // Op_RegD
4723 RAX_H_num // Op_RegL
4724 };
4725 // Excluded flags and vector registers.
4726 assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
4727 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
4728 %}
4729 %}
4730
4731 //----------ATTRIBUTES---------------------------------------------------------
4732 //----------Operand Attributes-------------------------------------------------
4733 op_attrib op_cost(0); // Required cost attribute
4734
4735 //----------Instruction Attributes---------------------------------------------
4736 ins_attrib ins_cost(100); // Required cost attribute
4737 ins_attrib ins_size(8); // Required size attribute (in bits)
4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
4739 // a non-matching short branch variant
4740 // of some long branch?
4741 ins_attrib ins_alignment(1); // Required alignment attribute (must
4742 // be a power of 2) specifies the
4743 // alignment that some part of the
4744 // instruction (not necessarily the
4745 // start) requires. If > 1, a
4746 // compute_padding() function must be
4747 // provided for the instruction
4748
4749 // Whether this node is expanded during code emission into a sequence of
4750 // instructions and the first instruction can perform an implicit null check.
4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
4752
4753 //----------OPERANDS-----------------------------------------------------------
4754 // Operand definitions must precede instruction definitions for correct parsing
4755 // in the ADLC because operands constitute user defined types which are used in
4756 // instruction definitions.
4757
4758 //----------Simple Operands----------------------------------------------------
4759 // Immediate Operands
4760 // Integer Immediate
4761 operand immI()
4762 %{
4763 match(ConI);
4764
4765 op_cost(10);
4766 format %{ %}
4767 interface(CONST_INTER);
4768 %}
4769
4770 // Constant for test vs zero
4771 operand immI_0()
4772 %{
4773 predicate(n->get_int() == 0);
4774 match(ConI);
4775
4776 op_cost(0);
4777 format %{ %}
4778 interface(CONST_INTER);
4779 %}
4780
4781 // Constant for increment
4782 operand immI_1()
4783 %{
4784 predicate(n->get_int() == 1);
4785 match(ConI);
4786
4787 op_cost(0);
4788 format %{ %}
4789 interface(CONST_INTER);
4790 %}
4791
4792 // Constant for decrement
4793 operand immI_M1()
4794 %{
4795 predicate(n->get_int() == -1);
4796 match(ConI);
4797
4798 op_cost(0);
4799 format %{ %}
4800 interface(CONST_INTER);
4801 %}
4802
4803 operand immI_2()
4804 %{
4805 predicate(n->get_int() == 2);
4806 match(ConI);
4807
4808 op_cost(0);
4809 format %{ %}
4810 interface(CONST_INTER);
4811 %}
4812
4813 operand immI_4()
4814 %{
4815 predicate(n->get_int() == 4);
4816 match(ConI);
4817
4818 op_cost(0);
4819 format %{ %}
4820 interface(CONST_INTER);
4821 %}
4822
4823 operand immI_8()
4824 %{
4825 predicate(n->get_int() == 8);
4826 match(ConI);
4827
4828 op_cost(0);
4829 format %{ %}
4830 interface(CONST_INTER);
4831 %}
4832
4833 // Valid scale values for addressing modes
4834 operand immI2()
4835 %{
4836 predicate(0 <= n->get_int() && (n->get_int() <= 3));
4837 match(ConI);
4838
4839 format %{ %}
4840 interface(CONST_INTER);
4841 %}
4842
4843 operand immU7()
4844 %{
4845 predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
4846 match(ConI);
4847
4848 op_cost(5);
4849 format %{ %}
4850 interface(CONST_INTER);
4851 %}
4852
4853 operand immI8()
4854 %{
4855 predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
4856 match(ConI);
4857
4858 op_cost(5);
4859 format %{ %}
4860 interface(CONST_INTER);
4861 %}
4862
4863 operand immU8()
4864 %{
4865 predicate((0 <= n->get_int()) && (n->get_int() <= 255));
4866 match(ConI);
4867
4868 op_cost(5);
4869 format %{ %}
4870 interface(CONST_INTER);
4871 %}
4872
4873 operand immI16()
4874 %{
4875 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4876 match(ConI);
4877
4878 op_cost(10);
4879 format %{ %}
4880 interface(CONST_INTER);
4881 %}
4882
4883 // Int Immediate non-negative
4884 operand immU31()
4885 %{
4886 predicate(n->get_int() >= 0);
4887 match(ConI);
4888
4889 op_cost(0);
4890 format %{ %}
4891 interface(CONST_INTER);
4892 %}
4893
4894 // Pointer Immediate
4895 operand immP()
4896 %{
4897 match(ConP);
4898
4899 op_cost(10);
4900 format %{ %}
4901 interface(CONST_INTER);
4902 %}
4903
4904 // Null Pointer Immediate
4905 operand immP0()
4906 %{
4907 predicate(n->get_ptr() == 0);
4908 match(ConP);
4909
4910 op_cost(5);
4911 format %{ %}
4912 interface(CONST_INTER);
4913 %}
4914
4915 // Pointer Immediate
4916 operand immN() %{
4917 match(ConN);
4918
4919 op_cost(10);
4920 format %{ %}
4921 interface(CONST_INTER);
4922 %}
4923
4924 operand immNKlass() %{
4925 match(ConNKlass);
4926
4927 op_cost(10);
4928 format %{ %}
4929 interface(CONST_INTER);
4930 %}
4931
4932 // Null Pointer Immediate
4933 operand immN0() %{
4934 predicate(n->get_narrowcon() == 0);
4935 match(ConN);
4936
4937 op_cost(5);
4938 format %{ %}
4939 interface(CONST_INTER);
4940 %}
4941
4942 operand immP31()
4943 %{
4944 predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
4945 && (n->get_ptr() >> 31) == 0);
4946 match(ConP);
4947
4948 op_cost(5);
4949 format %{ %}
4950 interface(CONST_INTER);
4951 %}
4952
4953
4954 // Long Immediate
4955 operand immL()
4956 %{
4957 match(ConL);
4958
4959 op_cost(20);
4960 format %{ %}
4961 interface(CONST_INTER);
4962 %}
4963
4964 // Long Immediate 8-bit
4965 operand immL8()
4966 %{
4967 predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
4968 match(ConL);
4969
4970 op_cost(5);
4971 format %{ %}
4972 interface(CONST_INTER);
4973 %}
4974
4975 // Long Immediate 32-bit unsigned
4976 operand immUL32()
4977 %{
4978 predicate(n->get_long() == (unsigned int) (n->get_long()));
4979 match(ConL);
4980
4981 op_cost(10);
4982 format %{ %}
4983 interface(CONST_INTER);
4984 %}
4985
4986 // Long Immediate 32-bit signed
4987 operand immL32()
4988 %{
4989 predicate(n->get_long() == (int) (n->get_long()));
4990 match(ConL);
4991
4992 op_cost(15);
4993 format %{ %}
4994 interface(CONST_INTER);
4995 %}
4996
4997 operand immL_Pow2()
4998 %{
4999 predicate(is_power_of_2((julong)n->get_long()));
5000 match(ConL);
5001
5002 op_cost(15);
5003 format %{ %}
5004 interface(CONST_INTER);
5005 %}
5006
5007 operand immL_NotPow2()
5008 %{
5009 predicate(is_power_of_2((julong)~n->get_long()));
5010 match(ConL);
5011
5012 op_cost(15);
5013 format %{ %}
5014 interface(CONST_INTER);
5015 %}
5016
5017 // Long Immediate zero
5018 operand immL0()
5019 %{
5020 predicate(n->get_long() == 0L);
5021 match(ConL);
5022
5023 op_cost(10);
5024 format %{ %}
5025 interface(CONST_INTER);
5026 %}
5027
5028 // Constant for increment
5029 operand immL1()
5030 %{
5031 predicate(n->get_long() == 1);
5032 match(ConL);
5033
5034 format %{ %}
5035 interface(CONST_INTER);
5036 %}
5037
5038 // Constant for decrement
5039 operand immL_M1()
5040 %{
5041 predicate(n->get_long() == -1);
5042 match(ConL);
5043
5044 format %{ %}
5045 interface(CONST_INTER);
5046 %}
5047
5048 // Long Immediate: low 32-bit mask
5049 operand immL_32bits()
5050 %{
5051 predicate(n->get_long() == 0xFFFFFFFFL);
5052 match(ConL);
5053 op_cost(20);
5054
5055 format %{ %}
5056 interface(CONST_INTER);
5057 %}
5058
5059 // Int Immediate: 2^n-1, positive
5060 operand immI_Pow2M1()
5061 %{
5062 predicate((n->get_int() > 0)
5063 && is_power_of_2((juint)n->get_int() + 1));
5064 match(ConI);
5065
5066 op_cost(20);
5067 format %{ %}
5068 interface(CONST_INTER);
5069 %}
5070
5071 // Float Immediate zero
5072 operand immF0()
5073 %{
5074 predicate(jint_cast(n->getf()) == 0);
5075 match(ConF);
5076
5077 op_cost(5);
5078 format %{ %}
5079 interface(CONST_INTER);
5080 %}
5081
5082 // Float Immediate
5083 operand immF()
5084 %{
5085 match(ConF);
5086
5087 op_cost(15);
5088 format %{ %}
5089 interface(CONST_INTER);
5090 %}
5091
5092 // Half Float Immediate
5093 operand immH()
5094 %{
5095 match(ConH);
5096
5097 op_cost(15);
5098 format %{ %}
5099 interface(CONST_INTER);
5100 %}
5101
5102 // Double Immediate zero
5103 operand immD0()
5104 %{
5105 predicate(jlong_cast(n->getd()) == 0);
5106 match(ConD);
5107
5108 op_cost(5);
5109 format %{ %}
5110 interface(CONST_INTER);
5111 %}
5112
5113 // Double Immediate
5114 operand immD()
5115 %{
5116 match(ConD);
5117
5118 op_cost(15);
5119 format %{ %}
5120 interface(CONST_INTER);
5121 %}
5122
5123 // Immediates for special shifts (sign extend)
5124
5125 // Constants for increment
5126 operand immI_16()
5127 %{
5128 predicate(n->get_int() == 16);
5129 match(ConI);
5130
5131 format %{ %}
5132 interface(CONST_INTER);
5133 %}
5134
5135 operand immI_24()
5136 %{
5137 predicate(n->get_int() == 24);
5138 match(ConI);
5139
5140 format %{ %}
5141 interface(CONST_INTER);
5142 %}
5143
5144 // Constant for byte-wide masking
5145 operand immI_255()
5146 %{
5147 predicate(n->get_int() == 255);
5148 match(ConI);
5149
5150 format %{ %}
5151 interface(CONST_INTER);
5152 %}
5153
5154 // Constant for short-wide masking
5155 operand immI_65535()
5156 %{
5157 predicate(n->get_int() == 65535);
5158 match(ConI);
5159
5160 format %{ %}
5161 interface(CONST_INTER);
5162 %}
5163
5164 // Constant for byte-wide masking
5165 operand immL_255()
5166 %{
5167 predicate(n->get_long() == 255);
5168 match(ConL);
5169
5170 format %{ %}
5171 interface(CONST_INTER);
5172 %}
5173
5174 // Constant for short-wide masking
5175 operand immL_65535()
5176 %{
5177 predicate(n->get_long() == 65535);
5178 match(ConL);
5179
5180 format %{ %}
5181 interface(CONST_INTER);
5182 %}
5183
5184 // AOT Runtime Constants Address
5185 operand immAOTRuntimeConstantsAddress()
5186 %{
5187 // Check if the address is in the range of AOT Runtime Constants
5188 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
5189 match(ConP);
5190
5191 op_cost(0);
5192 format %{ %}
5193 interface(CONST_INTER);
5194 %}
5195
5196 operand kReg()
5197 %{
5198 constraint(ALLOC_IN_RC(vectmask_reg));
5199 match(RegVectMask);
5200 format %{%}
5201 interface(REG_INTER);
5202 %}
5203
5204 // Register Operands
5205 // Integer Register
5206 operand rRegI()
5207 %{
5208 constraint(ALLOC_IN_RC(int_reg));
5209 match(RegI);
5210
5211 match(rax_RegI);
5212 match(rbx_RegI);
5213 match(rcx_RegI);
5214 match(rdx_RegI);
5215 match(rdi_RegI);
5216
5217 format %{ %}
5218 interface(REG_INTER);
5219 %}
5220
5221 // Special Registers
5222 operand rax_RegI()
5223 %{
5224 constraint(ALLOC_IN_RC(int_rax_reg));
5225 match(RegI);
5226 match(rRegI);
5227
5228 format %{ "RAX" %}
5229 interface(REG_INTER);
5230 %}
5231
5232 // Special Registers
5233 operand rbx_RegI()
5234 %{
5235 constraint(ALLOC_IN_RC(int_rbx_reg));
5236 match(RegI);
5237 match(rRegI);
5238
5239 format %{ "RBX" %}
5240 interface(REG_INTER);
5241 %}
5242
5243 operand rcx_RegI()
5244 %{
5245 constraint(ALLOC_IN_RC(int_rcx_reg));
5246 match(RegI);
5247 match(rRegI);
5248
5249 format %{ "RCX" %}
5250 interface(REG_INTER);
5251 %}
5252
5253 operand rdx_RegI()
5254 %{
5255 constraint(ALLOC_IN_RC(int_rdx_reg));
5256 match(RegI);
5257 match(rRegI);
5258
5259 format %{ "RDX" %}
5260 interface(REG_INTER);
5261 %}
5262
5263 operand rdi_RegI()
5264 %{
5265 constraint(ALLOC_IN_RC(int_rdi_reg));
5266 match(RegI);
5267 match(rRegI);
5268
5269 format %{ "RDI" %}
5270 interface(REG_INTER);
5271 %}
5272
5273 operand no_rax_rdx_RegI()
5274 %{
5275 constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
5276 match(RegI);
5277 match(rbx_RegI);
5278 match(rcx_RegI);
5279 match(rdi_RegI);
5280
5281 format %{ %}
5282 interface(REG_INTER);
5283 %}
5284
5285 operand no_rbp_r13_RegI()
5286 %{
5287 constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
5288 match(RegI);
5289 match(rRegI);
5290 match(rax_RegI);
5291 match(rbx_RegI);
5292 match(rcx_RegI);
5293 match(rdx_RegI);
5294 match(rdi_RegI);
5295
5296 format %{ %}
5297 interface(REG_INTER);
5298 %}
5299
5300 // Pointer Register
5301 operand any_RegP()
5302 %{
5303 constraint(ALLOC_IN_RC(any_reg));
5304 match(RegP);
5305 match(rax_RegP);
5306 match(rbx_RegP);
5307 match(rdi_RegP);
5308 match(rsi_RegP);
5309 match(rbp_RegP);
5310 match(r15_RegP);
5311 match(rRegP);
5312
5313 format %{ %}
5314 interface(REG_INTER);
5315 %}
5316
5317 operand rRegP()
5318 %{
5319 constraint(ALLOC_IN_RC(ptr_reg));
5320 match(RegP);
5321 match(rax_RegP);
5322 match(rbx_RegP);
5323 match(rdi_RegP);
5324 match(rsi_RegP);
5325 match(rbp_RegP); // See Q&A below about
5326 match(r15_RegP); // r15_RegP and rbp_RegP.
5327
5328 format %{ %}
5329 interface(REG_INTER);
5330 %}
5331
5332 operand rRegN() %{
5333 constraint(ALLOC_IN_RC(int_reg));
5334 match(RegN);
5335
5336 format %{ %}
5337 interface(REG_INTER);
5338 %}
5339
5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
5343 // The output of an instruction is controlled by the allocator, which respects
5344 // register class masks, not match rules. Unless an instruction mentions
5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
5346 // by the allocator as an input.
5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
5349 // result, RBP is not included in the output of the instruction either.
5350
5351 // This operand is not allowed to use RBP even if
5352 // RBP is not used to hold the frame pointer.
5353 operand no_rbp_RegP()
5354 %{
5355 constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
5356 match(RegP);
5357 match(rbx_RegP);
5358 match(rsi_RegP);
5359 match(rdi_RegP);
5360
5361 format %{ %}
5362 interface(REG_INTER);
5363 %}
5364
5365 // Special Registers
5366 // Return a pointer value
5367 operand rax_RegP()
5368 %{
5369 constraint(ALLOC_IN_RC(ptr_rax_reg));
5370 match(RegP);
5371 match(rRegP);
5372
5373 format %{ %}
5374 interface(REG_INTER);
5375 %}
5376
5377 // Special Registers
5378 // Return a compressed pointer value
5379 operand rax_RegN()
5380 %{
5381 constraint(ALLOC_IN_RC(int_rax_reg));
5382 match(RegN);
5383 match(rRegN);
5384
5385 format %{ %}
5386 interface(REG_INTER);
5387 %}
5388
5389 // Used in AtomicAdd
5390 operand rbx_RegP()
5391 %{
5392 constraint(ALLOC_IN_RC(ptr_rbx_reg));
5393 match(RegP);
5394 match(rRegP);
5395
5396 format %{ %}
5397 interface(REG_INTER);
5398 %}
5399
5400 operand rsi_RegP()
5401 %{
5402 constraint(ALLOC_IN_RC(ptr_rsi_reg));
5403 match(RegP);
5404 match(rRegP);
5405
5406 format %{ %}
5407 interface(REG_INTER);
5408 %}
5409
5410 operand rbp_RegP()
5411 %{
5412 constraint(ALLOC_IN_RC(ptr_rbp_reg));
5413 match(RegP);
5414 match(rRegP);
5415
5416 format %{ %}
5417 interface(REG_INTER);
5418 %}
5419
5420 // Used in rep stosq
5421 operand rdi_RegP()
5422 %{
5423 constraint(ALLOC_IN_RC(ptr_rdi_reg));
5424 match(RegP);
5425 match(rRegP);
5426
5427 format %{ %}
5428 interface(REG_INTER);
5429 %}
5430
5431 operand r15_RegP()
5432 %{
5433 constraint(ALLOC_IN_RC(ptr_r15_reg));
5434 match(RegP);
5435 match(rRegP);
5436
5437 format %{ %}
5438 interface(REG_INTER);
5439 %}
5440
5441 operand rRegL()
5442 %{
5443 constraint(ALLOC_IN_RC(long_reg));
5444 match(RegL);
5445 match(rax_RegL);
5446 match(rdx_RegL);
5447
5448 format %{ %}
5449 interface(REG_INTER);
5450 %}
5451
5452 // Special Registers
5453 operand no_rax_rdx_RegL()
5454 %{
5455 constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
5456 match(RegL);
5457 match(rRegL);
5458
5459 format %{ %}
5460 interface(REG_INTER);
5461 %}
5462
5463 operand rax_RegL()
5464 %{
5465 constraint(ALLOC_IN_RC(long_rax_reg));
5466 match(RegL);
5467 match(rRegL);
5468
5469 format %{ "RAX" %}
5470 interface(REG_INTER);
5471 %}
5472
5473 operand rcx_RegL()
5474 %{
5475 constraint(ALLOC_IN_RC(long_rcx_reg));
5476 match(RegL);
5477 match(rRegL);
5478
5479 format %{ %}
5480 interface(REG_INTER);
5481 %}
5482
5483 operand rdx_RegL()
5484 %{
5485 constraint(ALLOC_IN_RC(long_rdx_reg));
5486 match(RegL);
5487 match(rRegL);
5488
5489 format %{ %}
5490 interface(REG_INTER);
5491 %}
5492
5493 operand r11_RegL()
5494 %{
5495 constraint(ALLOC_IN_RC(long_r11_reg));
5496 match(RegL);
5497 match(rRegL);
5498
5499 format %{ %}
5500 interface(REG_INTER);
5501 %}
5502
5503 operand no_rbp_r13_RegL()
5504 %{
5505 constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
5506 match(RegL);
5507 match(rRegL);
5508 match(rax_RegL);
5509 match(rcx_RegL);
5510 match(rdx_RegL);
5511
5512 format %{ %}
5513 interface(REG_INTER);
5514 %}
5515
5516 // Flags register, used as output of compare instructions
5517 operand rFlagsReg()
5518 %{
5519 constraint(ALLOC_IN_RC(int_flags));
5520 match(RegFlags);
5521
5522 format %{ "RFLAGS" %}
5523 interface(REG_INTER);
5524 %}
5525
5526 // Flags register, used as output of FLOATING POINT compare instructions
5527 operand rFlagsRegU()
5528 %{
5529 constraint(ALLOC_IN_RC(int_flags));
5530 match(RegFlags);
5531
5532 format %{ "RFLAGS_U" %}
5533 interface(REG_INTER);
5534 %}
5535
5536 operand rFlagsRegUCF() %{
5537 constraint(ALLOC_IN_RC(int_flags));
5538 match(RegFlags);
5539 predicate(!UseAPX || !VM_Version::supports_avx10_2());
5540
5541 format %{ "RFLAGS_U_CF" %}
5542 interface(REG_INTER);
5543 %}
5544
5545 operand rFlagsRegUCFE() %{
5546 constraint(ALLOC_IN_RC(int_flags));
5547 match(RegFlags);
5548 predicate(UseAPX && VM_Version::supports_avx10_2());
5549
5550 format %{ "RFLAGS_U_CFE" %}
5551 interface(REG_INTER);
5552 %}
5553
5554 // Float register operands
5555 operand regF() %{
5556 constraint(ALLOC_IN_RC(float_reg));
5557 match(RegF);
5558
5559 format %{ %}
5560 interface(REG_INTER);
5561 %}
5562
5563 // Float register operands
5564 operand legRegF() %{
5565 constraint(ALLOC_IN_RC(float_reg_legacy));
5566 match(RegF);
5567
5568 format %{ %}
5569 interface(REG_INTER);
5570 %}
5571
5572 // Float register operands
5573 operand vlRegF() %{
5574 constraint(ALLOC_IN_RC(float_reg_vl));
5575 match(RegF);
5576
5577 format %{ %}
5578 interface(REG_INTER);
5579 %}
5580
5581 // Double register operands
5582 operand regD() %{
5583 constraint(ALLOC_IN_RC(double_reg));
5584 match(RegD);
5585
5586 format %{ %}
5587 interface(REG_INTER);
5588 %}
5589
5590 // Double register operands
5591 operand legRegD() %{
5592 constraint(ALLOC_IN_RC(double_reg_legacy));
5593 match(RegD);
5594
5595 format %{ %}
5596 interface(REG_INTER);
5597 %}
5598
5599 // Double register operands
5600 operand vlRegD() %{
5601 constraint(ALLOC_IN_RC(double_reg_vl));
5602 match(RegD);
5603
5604 format %{ %}
5605 interface(REG_INTER);
5606 %}
5607
5608 //----------Memory Operands----------------------------------------------------
5609 // Direct Memory Operand
5610 // operand direct(immP addr)
5611 // %{
5612 // match(addr);
5613
5614 // format %{ "[$addr]" %}
5615 // interface(MEMORY_INTER) %{
5616 // base(0xFFFFFFFF);
5617 // index(0x4);
5618 // scale(0x0);
5619 // disp($addr);
5620 // %}
5621 // %}
5622
5623 // Indirect Memory Operand
5624 operand indirect(any_RegP reg)
5625 %{
5626 constraint(ALLOC_IN_RC(ptr_reg));
5627 match(reg);
5628
5629 format %{ "[$reg]" %}
5630 interface(MEMORY_INTER) %{
5631 base($reg);
5632 index(0x4);
5633 scale(0x0);
5634 disp(0x0);
5635 %}
5636 %}
5637
5638 // Indirect Memory Plus Short Offset Operand
5639 operand indOffset8(any_RegP reg, immL8 off)
5640 %{
5641 constraint(ALLOC_IN_RC(ptr_reg));
5642 match(AddP reg off);
5643
5644 format %{ "[$reg + $off (8-bit)]" %}
5645 interface(MEMORY_INTER) %{
5646 base($reg);
5647 index(0x4);
5648 scale(0x0);
5649 disp($off);
5650 %}
5651 %}
5652
5653 // Indirect Memory Plus Long Offset Operand
5654 operand indOffset32(any_RegP reg, immL32 off)
5655 %{
5656 constraint(ALLOC_IN_RC(ptr_reg));
5657 match(AddP reg off);
5658
5659 format %{ "[$reg + $off (32-bit)]" %}
5660 interface(MEMORY_INTER) %{
5661 base($reg);
5662 index(0x4);
5663 scale(0x0);
5664 disp($off);
5665 %}
5666 %}
5667
5668 // Indirect Memory Plus Index Register Plus Offset Operand
5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
5670 %{
5671 constraint(ALLOC_IN_RC(ptr_reg));
5672 match(AddP (AddP reg lreg) off);
5673
5674 op_cost(10);
5675 format %{"[$reg + $off + $lreg]" %}
5676 interface(MEMORY_INTER) %{
5677 base($reg);
5678 index($lreg);
5679 scale(0x0);
5680 disp($off);
5681 %}
5682 %}
5683
5684 // Indirect Memory Plus Index Register Plus Offset Operand
5685 operand indIndex(any_RegP reg, rRegL lreg)
5686 %{
5687 constraint(ALLOC_IN_RC(ptr_reg));
5688 match(AddP reg lreg);
5689
5690 op_cost(10);
5691 format %{"[$reg + $lreg]" %}
5692 interface(MEMORY_INTER) %{
5693 base($reg);
5694 index($lreg);
5695 scale(0x0);
5696 disp(0x0);
5697 %}
5698 %}
5699
5700 // Indirect Memory Times Scale Plus Index Register
5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
5702 %{
5703 constraint(ALLOC_IN_RC(ptr_reg));
5704 match(AddP reg (LShiftL lreg scale));
5705
5706 op_cost(10);
5707 format %{"[$reg + $lreg << $scale]" %}
5708 interface(MEMORY_INTER) %{
5709 base($reg);
5710 index($lreg);
5711 scale($scale);
5712 disp(0x0);
5713 %}
5714 %}
5715
5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
5717 %{
5718 constraint(ALLOC_IN_RC(ptr_reg));
5719 predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5720 match(AddP reg (LShiftL (ConvI2L idx) scale));
5721
5722 op_cost(10);
5723 format %{"[$reg + pos $idx << $scale]" %}
5724 interface(MEMORY_INTER) %{
5725 base($reg);
5726 index($idx);
5727 scale($scale);
5728 disp(0x0);
5729 %}
5730 %}
5731
5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
5734 %{
5735 constraint(ALLOC_IN_RC(ptr_reg));
5736 match(AddP (AddP reg (LShiftL lreg scale)) off);
5737
5738 op_cost(10);
5739 format %{"[$reg + $off + $lreg << $scale]" %}
5740 interface(MEMORY_INTER) %{
5741 base($reg);
5742 index($lreg);
5743 scale($scale);
5744 disp($off);
5745 %}
5746 %}
5747
5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
5750 %{
5751 constraint(ALLOC_IN_RC(ptr_reg));
5752 predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5753 match(AddP (AddP reg (ConvI2L idx)) off);
5754
5755 op_cost(10);
5756 format %{"[$reg + $off + $idx]" %}
5757 interface(MEMORY_INTER) %{
5758 base($reg);
5759 index($idx);
5760 scale(0x0);
5761 disp($off);
5762 %}
5763 %}
5764
5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
5767 %{
5768 constraint(ALLOC_IN_RC(ptr_reg));
5769 predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5770 match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
5771
5772 op_cost(10);
5773 format %{"[$reg + $off + $idx << $scale]" %}
5774 interface(MEMORY_INTER) %{
5775 base($reg);
5776 index($idx);
5777 scale($scale);
5778 disp($off);
5779 %}
5780 %}
5781
5782 // Indirect Narrow Oop Plus Offset Operand
5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
5784 // we can't free r12 even with CompressedOops::base() == nullptr.
5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
5786 predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
5787 constraint(ALLOC_IN_RC(ptr_reg));
5788 match(AddP (DecodeN reg) off);
5789
5790 op_cost(10);
5791 format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
5792 interface(MEMORY_INTER) %{
5793 base(0xc); // R12
5794 index($reg);
5795 scale(0x3);
5796 disp($off);
5797 %}
5798 %}
5799
5800 // Indirect Memory Operand
5801 operand indirectNarrow(rRegN reg)
5802 %{
5803 predicate(CompressedOops::shift() == 0);
5804 constraint(ALLOC_IN_RC(ptr_reg));
5805 match(DecodeN reg);
5806
5807 format %{ "[$reg]" %}
5808 interface(MEMORY_INTER) %{
5809 base($reg);
5810 index(0x4);
5811 scale(0x0);
5812 disp(0x0);
5813 %}
5814 %}
5815
5816 // Indirect Memory Plus Short Offset Operand
5817 operand indOffset8Narrow(rRegN reg, immL8 off)
5818 %{
5819 predicate(CompressedOops::shift() == 0);
5820 constraint(ALLOC_IN_RC(ptr_reg));
5821 match(AddP (DecodeN reg) off);
5822
5823 format %{ "[$reg + $off (8-bit)]" %}
5824 interface(MEMORY_INTER) %{
5825 base($reg);
5826 index(0x4);
5827 scale(0x0);
5828 disp($off);
5829 %}
5830 %}
5831
5832 // Indirect Memory Plus Long Offset Operand
5833 operand indOffset32Narrow(rRegN reg, immL32 off)
5834 %{
5835 predicate(CompressedOops::shift() == 0);
5836 constraint(ALLOC_IN_RC(ptr_reg));
5837 match(AddP (DecodeN reg) off);
5838
5839 format %{ "[$reg + $off (32-bit)]" %}
5840 interface(MEMORY_INTER) %{
5841 base($reg);
5842 index(0x4);
5843 scale(0x0);
5844 disp($off);
5845 %}
5846 %}
5847
5848 // Indirect Memory Plus Index Register Plus Offset Operand
5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
5850 %{
5851 predicate(CompressedOops::shift() == 0);
5852 constraint(ALLOC_IN_RC(ptr_reg));
5853 match(AddP (AddP (DecodeN reg) lreg) off);
5854
5855 op_cost(10);
5856 format %{"[$reg + $off + $lreg]" %}
5857 interface(MEMORY_INTER) %{
5858 base($reg);
5859 index($lreg);
5860 scale(0x0);
5861 disp($off);
5862 %}
5863 %}
5864
5865 // Indirect Memory Plus Index Register Plus Offset Operand
5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
5867 %{
5868 predicate(CompressedOops::shift() == 0);
5869 constraint(ALLOC_IN_RC(ptr_reg));
5870 match(AddP (DecodeN reg) lreg);
5871
5872 op_cost(10);
5873 format %{"[$reg + $lreg]" %}
5874 interface(MEMORY_INTER) %{
5875 base($reg);
5876 index($lreg);
5877 scale(0x0);
5878 disp(0x0);
5879 %}
5880 %}
5881
5882 // Indirect Memory Times Scale Plus Index Register
5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
5884 %{
5885 predicate(CompressedOops::shift() == 0);
5886 constraint(ALLOC_IN_RC(ptr_reg));
5887 match(AddP (DecodeN reg) (LShiftL lreg scale));
5888
5889 op_cost(10);
5890 format %{"[$reg + $lreg << $scale]" %}
5891 interface(MEMORY_INTER) %{
5892 base($reg);
5893 index($lreg);
5894 scale($scale);
5895 disp(0x0);
5896 %}
5897 %}
5898
5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
5901 %{
5902 predicate(CompressedOops::shift() == 0);
5903 constraint(ALLOC_IN_RC(ptr_reg));
5904 match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
5905
5906 op_cost(10);
5907 format %{"[$reg + $off + $lreg << $scale]" %}
5908 interface(MEMORY_INTER) %{
5909 base($reg);
5910 index($lreg);
5911 scale($scale);
5912 disp($off);
5913 %}
5914 %}
5915
5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
5918 %{
5919 constraint(ALLOC_IN_RC(ptr_reg));
5920 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
5921 match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
5922
5923 op_cost(10);
5924 format %{"[$reg + $off + $idx]" %}
5925 interface(MEMORY_INTER) %{
5926 base($reg);
5927 index($idx);
5928 scale(0x0);
5929 disp($off);
5930 %}
5931 %}
5932
5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
5935 %{
5936 constraint(ALLOC_IN_RC(ptr_reg));
5937 predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
5938 match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
5939
5940 op_cost(10);
5941 format %{"[$reg + $off + $idx << $scale]" %}
5942 interface(MEMORY_INTER) %{
5943 base($reg);
5944 index($idx);
5945 scale($scale);
5946 disp($off);
5947 %}
5948 %}
5949
5950 //----------Special Memory Operands--------------------------------------------
5951 // Stack Slot Operand - This operand is used for loading and storing temporary
5952 // values on the stack where a match requires a value to
5953 // flow through memory.
5954 operand stackSlotP(sRegP reg)
5955 %{
5956 constraint(ALLOC_IN_RC(stack_slots));
5957 // No match rule because this operand is only generated in matching
5958
5959 format %{ "[$reg]" %}
5960 interface(MEMORY_INTER) %{
5961 base(0x4); // RSP
5962 index(0x4); // No Index
5963 scale(0x0); // No Scale
5964 disp($reg); // Stack Offset
5965 %}
5966 %}
5967
5968 operand stackSlotI(sRegI reg)
5969 %{
5970 constraint(ALLOC_IN_RC(stack_slots));
5971 // No match rule because this operand is only generated in matching
5972
5973 format %{ "[$reg]" %}
5974 interface(MEMORY_INTER) %{
5975 base(0x4); // RSP
5976 index(0x4); // No Index
5977 scale(0x0); // No Scale
5978 disp($reg); // Stack Offset
5979 %}
5980 %}
5981
5982 operand stackSlotF(sRegF reg)
5983 %{
5984 constraint(ALLOC_IN_RC(stack_slots));
5985 // No match rule because this operand is only generated in matching
5986
5987 format %{ "[$reg]" %}
5988 interface(MEMORY_INTER) %{
5989 base(0x4); // RSP
5990 index(0x4); // No Index
5991 scale(0x0); // No Scale
5992 disp($reg); // Stack Offset
5993 %}
5994 %}
5995
5996 operand stackSlotD(sRegD reg)
5997 %{
5998 constraint(ALLOC_IN_RC(stack_slots));
5999 // No match rule because this operand is only generated in matching
6000
6001 format %{ "[$reg]" %}
6002 interface(MEMORY_INTER) %{
6003 base(0x4); // RSP
6004 index(0x4); // No Index
6005 scale(0x0); // No Scale
6006 disp($reg); // Stack Offset
6007 %}
6008 %}
6009 operand stackSlotL(sRegL reg)
6010 %{
6011 constraint(ALLOC_IN_RC(stack_slots));
6012 // No match rule because this operand is only generated in matching
6013
6014 format %{ "[$reg]" %}
6015 interface(MEMORY_INTER) %{
6016 base(0x4); // RSP
6017 index(0x4); // No Index
6018 scale(0x0); // No Scale
6019 disp($reg); // Stack Offset
6020 %}
6021 %}
6022
6023 //----------Conditional Branch Operands----------------------------------------
6024 // Comparison Op - This is the operation of the comparison, and is limited to
6025 // the following set of codes:
6026 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6027 //
6028 // Other attributes of the comparison, such as unsignedness, are specified
6029 // by the comparison instruction that sets a condition code flags register.
6030 // That result is represented by a flags operand whose subtype is appropriate
6031 // to the unsignedness (etc.) of the comparison.
6032 //
6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6036
6037 // Comparison Code
6038 operand cmpOp()
6039 %{
6040 match(Bool);
6041
6042 format %{ "" %}
6043 interface(COND_INTER) %{
6044 equal(0x4, "e");
6045 not_equal(0x5, "ne");
6046 less(0xc, "l");
6047 greater_equal(0xd, "ge");
6048 less_equal(0xe, "le");
6049 greater(0xf, "g");
6050 overflow(0x0, "o");
6051 no_overflow(0x1, "no");
6052 %}
6053 %}
6054
6055 // Comparison Code, unsigned compare. Used by FP also, with
6056 // C2 (unordered) turned into GT or LT already. The other bits
6057 // C0 and C3 are turned into Carry & Zero flags.
6058 operand cmpOpU()
6059 %{
6060 match(Bool);
6061
6062 format %{ "" %}
6063 interface(COND_INTER) %{
6064 equal(0x4, "e");
6065 not_equal(0x5, "ne");
6066 less(0x2, "b");
6067 greater_equal(0x3, "ae");
6068 less_equal(0x6, "be");
6069 greater(0x7, "a");
6070 overflow(0x0, "o");
6071 no_overflow(0x1, "no");
6072 %}
6073 %}
6074
6075
6076 // Floating comparisons that don't require any fixup for the unordered case,
6077 // If both inputs of the comparison are the same, ZF is always set so we
6078 // don't need to use cmpOpUCF2 for eq/ne
6079 operand cmpOpUCF() %{
6080 match(Bool);
6081 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6082 (n->as_Bool()->_test._test == BoolTest::lt ||
6083 n->as_Bool()->_test._test == BoolTest::ge ||
6084 n->as_Bool()->_test._test == BoolTest::le ||
6085 n->as_Bool()->_test._test == BoolTest::gt ||
6086 n->in(1)->in(1) == n->in(1)->in(2)));
6087 format %{ "" %}
6088 interface(COND_INTER) %{
6089 equal(0xb, "np");
6090 not_equal(0xa, "p");
6091 less(0x2, "b");
6092 greater_equal(0x3, "ae");
6093 less_equal(0x6, "be");
6094 greater(0x7, "a");
6095 overflow(0x0, "o");
6096 no_overflow(0x1, "no");
6097 %}
6098 %}
6099
6100
6101 // Floating comparisons that can be fixed up with extra conditional jumps
6102 operand cmpOpUCF2() %{
6103 match(Bool);
6104 predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
6105 (n->as_Bool()->_test._test == BoolTest::ne ||
6106 n->as_Bool()->_test._test == BoolTest::eq) &&
6107 n->in(1)->in(1) != n->in(1)->in(2));
6108 format %{ "" %}
6109 interface(COND_INTER) %{
6110 equal(0x4, "e");
6111 not_equal(0x5, "ne");
6112 less(0x2, "b");
6113 greater_equal(0x3, "ae");
6114 less_equal(0x6, "be");
6115 greater(0x7, "a");
6116 overflow(0x0, "o");
6117 no_overflow(0x1, "no");
6118 %}
6119 %}
6120
6121
6122 // Floating point comparisons that set condition flags to test more directly,
6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
6125 // latter conditions to ones that use unsigned tests before passing into an
6126 // instruction because the preceding comparison might be based on a three way
6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
6128 operand cmpOpUCFE()
6129 %{
6130 match(Bool);
6131 predicate((UseAPX && VM_Version::supports_avx10_2()) &&
6132 (n->as_Bool()->_test._test == BoolTest::ne ||
6133 n->as_Bool()->_test._test == BoolTest::eq ||
6134 n->as_Bool()->_test._test == BoolTest::lt ||
6135 n->as_Bool()->_test._test == BoolTest::ge ||
6136 n->as_Bool()->_test._test == BoolTest::le ||
6137 n->as_Bool()->_test._test == BoolTest::gt));
6138
6139 format %{ "" %}
6140 interface(COND_INTER) %{
6141 equal(0x4, "e");
6142 not_equal(0x5, "ne");
6143 less(0x2, "b");
6144 greater_equal(0x3, "ae");
6145 less_equal(0x6, "be");
6146 greater(0x7, "a");
6147 overflow(0x0, "o");
6148 no_overflow(0x1, "no");
6149 %}
6150 %}
6151
6152 // Operands for bound floating pointer register arguments
6153 operand rxmm0() %{
6154 constraint(ALLOC_IN_RC(xmm0_reg));
6155 match(VecX);
6156 format%{%}
6157 interface(REG_INTER);
6158 %}
6159
6160 // Vectors
6161
6162 // Dummy generic vector class. Should be used for all vector operands.
6163 // Replaced with vec[SDXYZ] during post-selection pass.
6164 operand vec() %{
6165 constraint(ALLOC_IN_RC(dynamic));
6166 match(VecX);
6167 match(VecY);
6168 match(VecZ);
6169 match(VecS);
6170 match(VecD);
6171
6172 format %{ %}
6173 interface(REG_INTER);
6174 %}
6175
6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
6179 // runtime code generation via reg_class_dynamic.
6180 operand legVec() %{
6181 constraint(ALLOC_IN_RC(dynamic));
6182 match(VecX);
6183 match(VecY);
6184 match(VecZ);
6185 match(VecS);
6186 match(VecD);
6187
6188 format %{ %}
6189 interface(REG_INTER);
6190 %}
6191
6192 // Replaces vec during post-selection cleanup. See above.
6193 operand vecS() %{
6194 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
6195 match(VecS);
6196
6197 format %{ %}
6198 interface(REG_INTER);
6199 %}
6200
6201 // Replaces legVec during post-selection cleanup. See above.
6202 operand legVecS() %{
6203 constraint(ALLOC_IN_RC(vectors_reg_legacy));
6204 match(VecS);
6205
6206 format %{ %}
6207 interface(REG_INTER);
6208 %}
6209
6210 // Replaces vec during post-selection cleanup. See above.
6211 operand vecD() %{
6212 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
6213 match(VecD);
6214
6215 format %{ %}
6216 interface(REG_INTER);
6217 %}
6218
6219 // Replaces legVec during post-selection cleanup. See above.
6220 operand legVecD() %{
6221 constraint(ALLOC_IN_RC(vectord_reg_legacy));
6222 match(VecD);
6223
6224 format %{ %}
6225 interface(REG_INTER);
6226 %}
6227
6228 // Replaces vec during post-selection cleanup. See above.
6229 operand vecX() %{
6230 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
6231 match(VecX);
6232
6233 format %{ %}
6234 interface(REG_INTER);
6235 %}
6236
6237 // Replaces legVec during post-selection cleanup. See above.
6238 operand legVecX() %{
6239 constraint(ALLOC_IN_RC(vectorx_reg_legacy));
6240 match(VecX);
6241
6242 format %{ %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Replaces vec during post-selection cleanup. See above.
6247 operand vecY() %{
6248 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
6249 match(VecY);
6250
6251 format %{ %}
6252 interface(REG_INTER);
6253 %}
6254
6255 // Replaces legVec during post-selection cleanup. See above.
6256 operand legVecY() %{
6257 constraint(ALLOC_IN_RC(vectory_reg_legacy));
6258 match(VecY);
6259
6260 format %{ %}
6261 interface(REG_INTER);
6262 %}
6263
6264 // Replaces vec during post-selection cleanup. See above.
6265 operand vecZ() %{
6266 constraint(ALLOC_IN_RC(vectorz_reg));
6267 match(VecZ);
6268
6269 format %{ %}
6270 interface(REG_INTER);
6271 %}
6272
6273 // Replaces legVec during post-selection cleanup. See above.
6274 operand legVecZ() %{
6275 constraint(ALLOC_IN_RC(vectorz_reg_legacy));
6276 match(VecZ);
6277
6278 format %{ %}
6279 interface(REG_INTER);
6280 %}
6281
6282 //----------OPERAND CLASSES----------------------------------------------------
6283 // Operand Classes are groups of operands that are used as to simplify
6284 // instruction definitions by not requiring the AD writer to specify separate
6285 // instructions for every form of operand when the instruction accepts
6286 // multiple operand types with the same basic encoding and format. The classic
6287 // case of this is memory operands.
6288
6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
6290 indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
6291 indCompressedOopOffset,
6292 indirectNarrow, indOffset8Narrow, indOffset32Narrow,
6293 indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
6294 indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
6295
6296 //----------PIPELINE-----------------------------------------------------------
6297 // Rules which define the behavior of the target architectures pipeline.
6298 pipeline %{
6299
6300 //----------ATTRIBUTES---------------------------------------------------------
6301 attributes %{
6302 variable_size_instructions; // Fixed size instructions
6303 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle
6304 instruction_unit_size = 1; // An instruction is 1 bytes long
6305 instruction_fetch_unit_size = 16; // The processor fetches one line
6306 instruction_fetch_units = 1; // of 16 bytes
6307 %}
6308
6309 //----------RESOURCES----------------------------------------------------------
6310 // Resources are the functional units available to the machine
6311
6312 // Generic P2/P3 pipeline
6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
6314 // 3 instructions decoded per cycle.
6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
6316 // 3 ALU op, only ALU0 handles mul instructions.
6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
6318 MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
6319 BR, FPU,
6320 ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
6321
6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
6323 // Pipeline Description specifies the stages in the machine's pipeline
6324
6325 // Generic P2/P3 pipeline
6326 pipe_desc(S0, S1, S2, S3, S4, S5);
6327
6328 //----------PIPELINE CLASSES---------------------------------------------------
6329 // Pipeline Classes describe the stages in which input and output are
6330 // referenced by the hardware pipeline.
6331
6332 // Naming convention: ialu or fpu
6333 // Then: _reg
6334 // Then: _reg if there is a 2nd register
6335 // Then: _long if it's a pair of instructions implementing a long
6336 // Then: _fat if it requires the big decoder
6337 // Or: _mem if it requires the big decoder and a memory unit.
6338
6339 // Integer ALU reg operation
6340 pipe_class ialu_reg(rRegI dst)
6341 %{
6342 single_instruction;
6343 dst : S4(write);
6344 dst : S3(read);
6345 DECODE : S0; // any decoder
6346 ALU : S3; // any alu
6347 %}
6348
6349 // Long ALU reg operation
6350 pipe_class ialu_reg_long(rRegL dst)
6351 %{
6352 instruction_count(2);
6353 dst : S4(write);
6354 dst : S3(read);
6355 DECODE : S0(2); // any 2 decoders
6356 ALU : S3(2); // both alus
6357 %}
6358
6359 // Integer ALU reg operation using big decoder
6360 pipe_class ialu_reg_fat(rRegI dst)
6361 %{
6362 single_instruction;
6363 dst : S4(write);
6364 dst : S3(read);
6365 D0 : S0; // big decoder only
6366 ALU : S3; // any alu
6367 %}
6368
6369 // Integer ALU reg-reg operation
6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
6371 %{
6372 single_instruction;
6373 dst : S4(write);
6374 src : S3(read);
6375 DECODE : S0; // any decoder
6376 ALU : S3; // any alu
6377 %}
6378
6379 // Integer ALU reg-reg operation
6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
6381 %{
6382 single_instruction;
6383 dst : S4(write);
6384 src : S3(read);
6385 D0 : S0; // big decoder only
6386 ALU : S3; // any alu
6387 %}
6388
6389 // Integer ALU reg-mem operation
6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
6391 %{
6392 single_instruction;
6393 dst : S5(write);
6394 mem : S3(read);
6395 D0 : S0; // big decoder only
6396 ALU : S4; // any alu
6397 MEM : S3; // any mem
6398 %}
6399
6400 // Integer mem operation (prefetch)
6401 pipe_class ialu_mem(memory mem)
6402 %{
6403 single_instruction;
6404 mem : S3(read);
6405 D0 : S0; // big decoder only
6406 MEM : S3; // any mem
6407 %}
6408
6409 // Integer Store to Memory
6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
6411 %{
6412 single_instruction;
6413 mem : S3(read);
6414 src : S5(read);
6415 D0 : S0; // big decoder only
6416 ALU : S4; // any alu
6417 MEM : S3;
6418 %}
6419
6420 // // Long Store to Memory
6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
6422 // %{
6423 // instruction_count(2);
6424 // mem : S3(read);
6425 // src : S5(read);
6426 // D0 : S0(2); // big decoder only; twice
6427 // ALU : S4(2); // any 2 alus
6428 // MEM : S3(2); // Both mems
6429 // %}
6430
6431 // Integer Store to Memory
6432 pipe_class ialu_mem_imm(memory mem)
6433 %{
6434 single_instruction;
6435 mem : S3(read);
6436 D0 : S0; // big decoder only
6437 ALU : S4; // any alu
6438 MEM : S3;
6439 %}
6440
6441 // Integer ALU0 reg-reg operation
6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
6443 %{
6444 single_instruction;
6445 dst : S4(write);
6446 src : S3(read);
6447 D0 : S0; // Big decoder only
6448 ALU0 : S3; // only alu0
6449 %}
6450
6451 // Integer ALU0 reg-mem operation
6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
6453 %{
6454 single_instruction;
6455 dst : S5(write);
6456 mem : S3(read);
6457 D0 : S0; // big decoder only
6458 ALU0 : S4; // ALU0 only
6459 MEM : S3; // any mem
6460 %}
6461
6462 // Integer ALU reg-reg operation
6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
6464 %{
6465 single_instruction;
6466 cr : S4(write);
6467 src1 : S3(read);
6468 src2 : S3(read);
6469 DECODE : S0; // any decoder
6470 ALU : S3; // any alu
6471 %}
6472
6473 // Integer ALU reg-imm operation
6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
6475 %{
6476 single_instruction;
6477 cr : S4(write);
6478 src1 : S3(read);
6479 DECODE : S0; // any decoder
6480 ALU : S3; // any alu
6481 %}
6482
6483 // Integer ALU reg-mem operation
6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
6485 %{
6486 single_instruction;
6487 cr : S4(write);
6488 src1 : S3(read);
6489 src2 : S3(read);
6490 D0 : S0; // big decoder only
6491 ALU : S4; // any alu
6492 MEM : S3;
6493 %}
6494
6495 // Conditional move reg-reg
6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
6497 %{
6498 instruction_count(4);
6499 y : S4(read);
6500 q : S3(read);
6501 p : S3(read);
6502 DECODE : S0(4); // any decoder
6503 %}
6504
6505 // Conditional move reg-reg
6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
6507 %{
6508 single_instruction;
6509 dst : S4(write);
6510 src : S3(read);
6511 cr : S3(read);
6512 DECODE : S0; // any decoder
6513 %}
6514
6515 // Conditional move reg-mem
6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
6517 %{
6518 single_instruction;
6519 dst : S4(write);
6520 src : S3(read);
6521 cr : S3(read);
6522 DECODE : S0; // any decoder
6523 MEM : S3;
6524 %}
6525
6526 // Conditional move reg-reg long
6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
6528 %{
6529 single_instruction;
6530 dst : S4(write);
6531 src : S3(read);
6532 cr : S3(read);
6533 DECODE : S0(2); // any 2 decoders
6534 %}
6535
6536 // Float reg-reg operation
6537 pipe_class fpu_reg(regD dst)
6538 %{
6539 instruction_count(2);
6540 dst : S3(read);
6541 DECODE : S0(2); // any 2 decoders
6542 FPU : S3;
6543 %}
6544
6545 // Float reg-reg operation
6546 pipe_class fpu_reg_reg(regD dst, regD src)
6547 %{
6548 instruction_count(2);
6549 dst : S4(write);
6550 src : S3(read);
6551 DECODE : S0(2); // any 2 decoders
6552 FPU : S3;
6553 %}
6554
6555 // Float reg-reg operation
6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
6557 %{
6558 instruction_count(3);
6559 dst : S4(write);
6560 src1 : S3(read);
6561 src2 : S3(read);
6562 DECODE : S0(3); // any 3 decoders
6563 FPU : S3(2);
6564 %}
6565
6566 // Float reg-reg operation
6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
6568 %{
6569 instruction_count(4);
6570 dst : S4(write);
6571 src1 : S3(read);
6572 src2 : S3(read);
6573 src3 : S3(read);
6574 DECODE : S0(4); // any 3 decoders
6575 FPU : S3(2);
6576 %}
6577
6578 // Float reg-reg operation
6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
6580 %{
6581 instruction_count(4);
6582 dst : S4(write);
6583 src1 : S3(read);
6584 src2 : S3(read);
6585 src3 : S3(read);
6586 DECODE : S1(3); // any 3 decoders
6587 D0 : S0; // Big decoder only
6588 FPU : S3(2);
6589 MEM : S3;
6590 %}
6591
6592 // Float reg-mem operation
6593 pipe_class fpu_reg_mem(regD dst, memory mem)
6594 %{
6595 instruction_count(2);
6596 dst : S5(write);
6597 mem : S3(read);
6598 D0 : S0; // big decoder only
6599 DECODE : S1; // any decoder for FPU POP
6600 FPU : S4;
6601 MEM : S3; // any mem
6602 %}
6603
6604 // Float reg-mem operation
6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
6606 %{
6607 instruction_count(3);
6608 dst : S5(write);
6609 src1 : S3(read);
6610 mem : S3(read);
6611 D0 : S0; // big decoder only
6612 DECODE : S1(2); // any decoder for FPU POP
6613 FPU : S4;
6614 MEM : S3; // any mem
6615 %}
6616
6617 // Float mem-reg operation
6618 pipe_class fpu_mem_reg(memory mem, regD src)
6619 %{
6620 instruction_count(2);
6621 src : S5(read);
6622 mem : S3(read);
6623 DECODE : S0; // any decoder for FPU PUSH
6624 D0 : S1; // big decoder only
6625 FPU : S4;
6626 MEM : S3; // any mem
6627 %}
6628
6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
6630 %{
6631 instruction_count(3);
6632 src1 : S3(read);
6633 src2 : S3(read);
6634 mem : S3(read);
6635 DECODE : S0(2); // any decoder for FPU PUSH
6636 D0 : S1; // big decoder only
6637 FPU : S4;
6638 MEM : S3; // any mem
6639 %}
6640
6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
6642 %{
6643 instruction_count(3);
6644 src1 : S3(read);
6645 src2 : S3(read);
6646 mem : S4(read);
6647 DECODE : S0; // any decoder for FPU PUSH
6648 D0 : S0(2); // big decoder only
6649 FPU : S4;
6650 MEM : S3(2); // any mem
6651 %}
6652
6653 pipe_class fpu_mem_mem(memory dst, memory src1)
6654 %{
6655 instruction_count(2);
6656 src1 : S3(read);
6657 dst : S4(read);
6658 D0 : S0(2); // big decoder only
6659 MEM : S3(2); // any mem
6660 %}
6661
6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
6663 %{
6664 instruction_count(3);
6665 src1 : S3(read);
6666 src2 : S3(read);
6667 dst : S4(read);
6668 D0 : S0(3); // big decoder only
6669 FPU : S4;
6670 MEM : S3(3); // any mem
6671 %}
6672
6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
6674 %{
6675 instruction_count(3);
6676 src1 : S4(read);
6677 mem : S4(read);
6678 DECODE : S0; // any decoder for FPU PUSH
6679 D0 : S0(2); // big decoder only
6680 FPU : S4;
6681 MEM : S3(2); // any mem
6682 %}
6683
6684 // Float load constant
6685 pipe_class fpu_reg_con(regD dst)
6686 %{
6687 instruction_count(2);
6688 dst : S5(write);
6689 D0 : S0; // big decoder only for the load
6690 DECODE : S1; // any decoder for FPU POP
6691 FPU : S4;
6692 MEM : S3; // any mem
6693 %}
6694
6695 // Float load constant
6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
6697 %{
6698 instruction_count(3);
6699 dst : S5(write);
6700 src : S3(read);
6701 D0 : S0; // big decoder only for the load
6702 DECODE : S1(2); // any decoder for FPU POP
6703 FPU : S4;
6704 MEM : S3; // any mem
6705 %}
6706
6707 // UnConditional branch
6708 pipe_class pipe_jmp(label labl)
6709 %{
6710 single_instruction;
6711 BR : S3;
6712 %}
6713
6714 // Conditional branch
6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
6716 %{
6717 single_instruction;
6718 cr : S1(read);
6719 BR : S3;
6720 %}
6721
6722 // Allocation idiom
6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
6724 %{
6725 instruction_count(1); force_serialization;
6726 fixed_latency(6);
6727 heap_ptr : S3(read);
6728 DECODE : S0(3);
6729 D0 : S2;
6730 MEM : S3;
6731 ALU : S3(2);
6732 dst : S5(write);
6733 BR : S5;
6734 %}
6735
6736 // Generic big/slow expanded idiom
6737 pipe_class pipe_slow()
6738 %{
6739 instruction_count(10); multiple_bundles; force_serialization;
6740 fixed_latency(100);
6741 D0 : S0(2);
6742 MEM : S3(2);
6743 %}
6744
6745 // The real do-nothing guy
6746 pipe_class empty()
6747 %{
6748 instruction_count(0);
6749 %}
6750
6751 // Define the class for the Nop node
6752 define
6753 %{
6754 MachNop = empty;
6755 %}
6756
6757 %}
6758
6759 //----------INSTRUCTIONS-------------------------------------------------------
6760 //
6761 // match -- States which machine-independent subtree may be replaced
6762 // by this instruction.
6763 // ins_cost -- The estimated cost of this instruction is used by instruction
6764 // selection to identify a minimum cost tree of machine
6765 // instructions that matches a tree of machine-independent
6766 // instructions.
6767 // format -- A string providing the disassembly for this instruction.
6768 // The value of an instruction's operand may be inserted
6769 // by referring to it with a '$' prefix.
6770 // opcode -- Three instruction opcodes may be provided. These are referred
6771 // to within an encode class as $primary, $secondary, and $tertiary
6772 // rrspectively. The primary opcode is commonly used to
6773 // indicate the type of machine instruction, while secondary
6774 // and tertiary are often used for prefix options or addressing
6775 // modes.
6776 // ins_encode -- A list of encode classes with parameters. The encode class
6777 // name must have been defined in an 'enc_class' specification
6778 // in the encode section of the architecture description.
6779
6780 // ============================================================================
6781
6782 instruct ShouldNotReachHere() %{
6783 match(Halt);
6784 format %{ "stop\t# ShouldNotReachHere" %}
6785 ins_encode %{
6786 if (is_reachable()) {
6787 const char* str = __ code_string(_halt_reason);
6788 __ stop(str);
6789 }
6790 %}
6791 ins_pipe(pipe_slow);
6792 %}
6793
6794 // ============================================================================
6795
6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
6797 // Load Float
6798 instruct MoveF2VL(vlRegF dst, regF src) %{
6799 match(Set dst src);
6800 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6801 ins_encode %{
6802 ShouldNotReachHere();
6803 %}
6804 ins_pipe( fpu_reg_reg );
6805 %}
6806
6807 // Load Float
6808 instruct MoveF2LEG(legRegF dst, regF src) %{
6809 match(Set dst src);
6810 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6811 ins_encode %{
6812 ShouldNotReachHere();
6813 %}
6814 ins_pipe( fpu_reg_reg );
6815 %}
6816
6817 // Load Float
6818 instruct MoveVL2F(regF dst, vlRegF src) %{
6819 match(Set dst src);
6820 format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6821 ins_encode %{
6822 ShouldNotReachHere();
6823 %}
6824 ins_pipe( fpu_reg_reg );
6825 %}
6826
6827 // Load Float
6828 instruct MoveLEG2F(regF dst, legRegF src) %{
6829 match(Set dst src);
6830 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
6831 ins_encode %{
6832 ShouldNotReachHere();
6833 %}
6834 ins_pipe( fpu_reg_reg );
6835 %}
6836
6837 // Load Double
6838 instruct MoveD2VL(vlRegD dst, regD src) %{
6839 match(Set dst src);
6840 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6841 ins_encode %{
6842 ShouldNotReachHere();
6843 %}
6844 ins_pipe( fpu_reg_reg );
6845 %}
6846
6847 // Load Double
6848 instruct MoveD2LEG(legRegD dst, regD src) %{
6849 match(Set dst src);
6850 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6851 ins_encode %{
6852 ShouldNotReachHere();
6853 %}
6854 ins_pipe( fpu_reg_reg );
6855 %}
6856
6857 // Load Double
6858 instruct MoveVL2D(regD dst, vlRegD src) %{
6859 match(Set dst src);
6860 format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6861 ins_encode %{
6862 ShouldNotReachHere();
6863 %}
6864 ins_pipe( fpu_reg_reg );
6865 %}
6866
6867 // Load Double
6868 instruct MoveLEG2D(regD dst, legRegD src) %{
6869 match(Set dst src);
6870 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
6871 ins_encode %{
6872 ShouldNotReachHere();
6873 %}
6874 ins_pipe( fpu_reg_reg );
6875 %}
6876
6877 //----------Load/Store/Move Instructions---------------------------------------
6878 //----------Load Instructions--------------------------------------------------
6879
6880 // Load Byte (8 bit signed)
6881 instruct loadB(rRegI dst, memory mem)
6882 %{
6883 match(Set dst (LoadB mem));
6884
6885 ins_cost(125);
6886 format %{ "movsbl $dst, $mem\t# byte" %}
6887
6888 ins_encode %{
6889 __ movsbl($dst$$Register, $mem$$Address);
6890 %}
6891
6892 ins_pipe(ialu_reg_mem);
6893 %}
6894
6895 // Load Byte (8 bit signed) into Long Register
6896 instruct loadB2L(rRegL dst, memory mem)
6897 %{
6898 match(Set dst (ConvI2L (LoadB mem)));
6899
6900 ins_cost(125);
6901 format %{ "movsbq $dst, $mem\t# byte -> long" %}
6902
6903 ins_encode %{
6904 __ movsbq($dst$$Register, $mem$$Address);
6905 %}
6906
6907 ins_pipe(ialu_reg_mem);
6908 %}
6909
6910 // Load Unsigned Byte (8 bit UNsigned)
6911 instruct loadUB(rRegI dst, memory mem)
6912 %{
6913 match(Set dst (LoadUB mem));
6914
6915 ins_cost(125);
6916 format %{ "movzbl $dst, $mem\t# ubyte" %}
6917
6918 ins_encode %{
6919 __ movzbl($dst$$Register, $mem$$Address);
6920 %}
6921
6922 ins_pipe(ialu_reg_mem);
6923 %}
6924
6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6926 instruct loadUB2L(rRegL dst, memory mem)
6927 %{
6928 match(Set dst (ConvI2L (LoadUB mem)));
6929
6930 ins_cost(125);
6931 format %{ "movzbq $dst, $mem\t# ubyte -> long" %}
6932
6933 ins_encode %{
6934 __ movzbq($dst$$Register, $mem$$Address);
6935 %}
6936
6937 ins_pipe(ialu_reg_mem);
6938 %}
6939
6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
6942 match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6943 effect(KILL cr);
6944
6945 format %{ "movzbq $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
6946 "andl $dst, right_n_bits($mask, 8)" %}
6947 ins_encode %{
6948 Register Rdst = $dst$$Register;
6949 __ movzbq(Rdst, $mem$$Address);
6950 __ andl(Rdst, $mask$$constant & right_n_bits(8));
6951 %}
6952 ins_pipe(ialu_reg_mem);
6953 %}
6954
6955 // Load Short (16 bit signed)
6956 instruct loadS(rRegI dst, memory mem)
6957 %{
6958 match(Set dst (LoadS mem));
6959
6960 ins_cost(125);
6961 format %{ "movswl $dst, $mem\t# short" %}
6962
6963 ins_encode %{
6964 __ movswl($dst$$Register, $mem$$Address);
6965 %}
6966
6967 ins_pipe(ialu_reg_mem);
6968 %}
6969
6970 // Load Short (16 bit signed) to Byte (8 bit signed)
6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
6972 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6973
6974 ins_cost(125);
6975 format %{ "movsbl $dst, $mem\t# short -> byte" %}
6976 ins_encode %{
6977 __ movsbl($dst$$Register, $mem$$Address);
6978 %}
6979 ins_pipe(ialu_reg_mem);
6980 %}
6981
6982 // Load Short (16 bit signed) into Long Register
6983 instruct loadS2L(rRegL dst, memory mem)
6984 %{
6985 match(Set dst (ConvI2L (LoadS mem)));
6986
6987 ins_cost(125);
6988 format %{ "movswq $dst, $mem\t# short -> long" %}
6989
6990 ins_encode %{
6991 __ movswq($dst$$Register, $mem$$Address);
6992 %}
6993
6994 ins_pipe(ialu_reg_mem);
6995 %}
6996
6997 // Load Unsigned Short/Char (16 bit UNsigned)
6998 instruct loadUS(rRegI dst, memory mem)
6999 %{
7000 match(Set dst (LoadUS mem));
7001
7002 ins_cost(125);
7003 format %{ "movzwl $dst, $mem\t# ushort/char" %}
7004
7005 ins_encode %{
7006 __ movzwl($dst$$Register, $mem$$Address);
7007 %}
7008
7009 ins_pipe(ialu_reg_mem);
7010 %}
7011
7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7014 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
7015
7016 ins_cost(125);
7017 format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
7018 ins_encode %{
7019 __ movsbl($dst$$Register, $mem$$Address);
7020 %}
7021 ins_pipe(ialu_reg_mem);
7022 %}
7023
7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
7025 instruct loadUS2L(rRegL dst, memory mem)
7026 %{
7027 match(Set dst (ConvI2L (LoadUS mem)));
7028
7029 ins_cost(125);
7030 format %{ "movzwq $dst, $mem\t# ushort/char -> long" %}
7031
7032 ins_encode %{
7033 __ movzwq($dst$$Register, $mem$$Address);
7034 %}
7035
7036 ins_pipe(ialu_reg_mem);
7037 %}
7038
7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7041 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7042
7043 format %{ "movzbq $dst, $mem\t# ushort/char & 0xFF -> long" %}
7044 ins_encode %{
7045 __ movzbq($dst$$Register, $mem$$Address);
7046 %}
7047 ins_pipe(ialu_reg_mem);
7048 %}
7049
7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
7052 match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
7053 effect(KILL cr);
7054
7055 format %{ "movzwq $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
7056 "andl $dst, right_n_bits($mask, 16)" %}
7057 ins_encode %{
7058 Register Rdst = $dst$$Register;
7059 __ movzwq(Rdst, $mem$$Address);
7060 __ andl(Rdst, $mask$$constant & right_n_bits(16));
7061 %}
7062 ins_pipe(ialu_reg_mem);
7063 %}
7064
7065 // Load Integer
7066 instruct loadI(rRegI dst, memory mem)
7067 %{
7068 match(Set dst (LoadI mem));
7069
7070 ins_cost(125);
7071 format %{ "movl $dst, $mem\t# int" %}
7072
7073 ins_encode %{
7074 __ movl($dst$$Register, $mem$$Address);
7075 %}
7076
7077 ins_pipe(ialu_reg_mem);
7078 %}
7079
7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
7082 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
7083
7084 ins_cost(125);
7085 format %{ "movsbl $dst, $mem\t# int -> byte" %}
7086 ins_encode %{
7087 __ movsbl($dst$$Register, $mem$$Address);
7088 %}
7089 ins_pipe(ialu_reg_mem);
7090 %}
7091
7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
7094 match(Set dst (AndI (LoadI mem) mask));
7095
7096 ins_cost(125);
7097 format %{ "movzbl $dst, $mem\t# int -> ubyte" %}
7098 ins_encode %{
7099 __ movzbl($dst$$Register, $mem$$Address);
7100 %}
7101 ins_pipe(ialu_reg_mem);
7102 %}
7103
7104 // Load Integer (32 bit signed) to Short (16 bit signed)
7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
7106 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
7107
7108 ins_cost(125);
7109 format %{ "movswl $dst, $mem\t# int -> short" %}
7110 ins_encode %{
7111 __ movswl($dst$$Register, $mem$$Address);
7112 %}
7113 ins_pipe(ialu_reg_mem);
7114 %}
7115
7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
7118 match(Set dst (AndI (LoadI mem) mask));
7119
7120 ins_cost(125);
7121 format %{ "movzwl $dst, $mem\t# int -> ushort/char" %}
7122 ins_encode %{
7123 __ movzwl($dst$$Register, $mem$$Address);
7124 %}
7125 ins_pipe(ialu_reg_mem);
7126 %}
7127
7128 // Load Integer into Long Register
7129 instruct loadI2L(rRegL dst, memory mem)
7130 %{
7131 match(Set dst (ConvI2L (LoadI mem)));
7132
7133 ins_cost(125);
7134 format %{ "movslq $dst, $mem\t# int -> long" %}
7135
7136 ins_encode %{
7137 __ movslq($dst$$Register, $mem$$Address);
7138 %}
7139
7140 ins_pipe(ialu_reg_mem);
7141 %}
7142
7143 // Load Integer with mask 0xFF into Long Register
7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
7145 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7146
7147 format %{ "movzbq $dst, $mem\t# int & 0xFF -> long" %}
7148 ins_encode %{
7149 __ movzbq($dst$$Register, $mem$$Address);
7150 %}
7151 ins_pipe(ialu_reg_mem);
7152 %}
7153
7154 // Load Integer with mask 0xFFFF into Long Register
7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
7156 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7157
7158 format %{ "movzwq $dst, $mem\t# int & 0xFFFF -> long" %}
7159 ins_encode %{
7160 __ movzwq($dst$$Register, $mem$$Address);
7161 %}
7162 ins_pipe(ialu_reg_mem);
7163 %}
7164
7165 // Load Integer with a 31-bit mask into Long Register
7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
7167 match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
7168 effect(KILL cr);
7169
7170 format %{ "movl $dst, $mem\t# int & 31-bit mask -> long\n\t"
7171 "andl $dst, $mask" %}
7172 ins_encode %{
7173 Register Rdst = $dst$$Register;
7174 __ movl(Rdst, $mem$$Address);
7175 __ andl(Rdst, $mask$$constant);
7176 %}
7177 ins_pipe(ialu_reg_mem);
7178 %}
7179
7180 // Load Unsigned Integer into Long Register
7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
7182 %{
7183 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7184
7185 ins_cost(125);
7186 format %{ "movl $dst, $mem\t# uint -> long" %}
7187
7188 ins_encode %{
7189 __ movl($dst$$Register, $mem$$Address);
7190 %}
7191
7192 ins_pipe(ialu_reg_mem);
7193 %}
7194
7195 // Load Long
7196 instruct loadL(rRegL dst, memory mem)
7197 %{
7198 match(Set dst (LoadL mem));
7199
7200 ins_cost(125);
7201 format %{ "movq $dst, $mem\t# long" %}
7202
7203 ins_encode %{
7204 __ movq($dst$$Register, $mem$$Address);
7205 %}
7206
7207 ins_pipe(ialu_reg_mem); // XXX
7208 %}
7209
7210 // Load Range
7211 instruct loadRange(rRegI dst, memory mem)
7212 %{
7213 match(Set dst (LoadRange mem));
7214
7215 ins_cost(125); // XXX
7216 format %{ "movl $dst, $mem\t# range" %}
7217 ins_encode %{
7218 __ movl($dst$$Register, $mem$$Address);
7219 %}
7220 ins_pipe(ialu_reg_mem);
7221 %}
7222
7223 // Load Pointer
7224 instruct loadP(rRegP dst, memory mem)
7225 %{
7226 match(Set dst (LoadP mem));
7227 predicate(n->as_Load()->barrier_data() == 0);
7228
7229 ins_cost(125); // XXX
7230 format %{ "movq $dst, $mem\t# ptr" %}
7231 ins_encode %{
7232 __ movq($dst$$Register, $mem$$Address);
7233 %}
7234 ins_pipe(ialu_reg_mem); // XXX
7235 %}
7236
7237 // Load Compressed Pointer
7238 instruct loadN(rRegN dst, memory mem)
7239 %{
7240 predicate(n->as_Load()->barrier_data() == 0);
7241 match(Set dst (LoadN mem));
7242
7243 ins_cost(125); // XXX
7244 format %{ "movl $dst, $mem\t# compressed ptr" %}
7245 ins_encode %{
7246 __ movl($dst$$Register, $mem$$Address);
7247 %}
7248 ins_pipe(ialu_reg_mem); // XXX
7249 %}
7250
7251
7252 // Load Klass Pointer
7253 instruct loadKlass(rRegP dst, memory mem)
7254 %{
7255 match(Set dst (LoadKlass mem));
7256
7257 ins_cost(125); // XXX
7258 format %{ "movq $dst, $mem\t# class" %}
7259 ins_encode %{
7260 __ movq($dst$$Register, $mem$$Address);
7261 %}
7262 ins_pipe(ialu_reg_mem); // XXX
7263 %}
7264
7265 // Load narrow Klass Pointer
7266 instruct loadNKlass(rRegN dst, memory mem)
7267 %{
7268 predicate(!UseCompactObjectHeaders);
7269 match(Set dst (LoadNKlass mem));
7270
7271 ins_cost(125); // XXX
7272 format %{ "movl $dst, $mem\t# compressed klass ptr" %}
7273 ins_encode %{
7274 __ movl($dst$$Register, $mem$$Address);
7275 %}
7276 ins_pipe(ialu_reg_mem); // XXX
7277 %}
7278
7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
7280 %{
7281 predicate(UseCompactObjectHeaders);
7282 match(Set dst (LoadNKlass mem));
7283 effect(KILL cr);
7284 ins_cost(125);
7285 format %{
7286 "movl $dst, $mem\t# compressed klass ptr, shifted\n\t"
7287 "shrl $dst, markWord::klass_shift_at_offset"
7288 %}
7289 ins_encode %{
7290 if (UseAPX) {
7291 __ eshrl($dst$$Register, $mem$$Address, markWord::klass_shift_at_offset, false);
7292 }
7293 else {
7294 __ movl($dst$$Register, $mem$$Address);
7295 __ shrl($dst$$Register, markWord::klass_shift_at_offset);
7296 }
7297 %}
7298 ins_pipe(ialu_reg_mem);
7299 %}
7300
7301 // Load Float
7302 instruct loadF(regF dst, memory mem)
7303 %{
7304 match(Set dst (LoadF mem));
7305
7306 ins_cost(145); // XXX
7307 format %{ "movss $dst, $mem\t# float" %}
7308 ins_encode %{
7309 __ movflt($dst$$XMMRegister, $mem$$Address);
7310 %}
7311 ins_pipe(pipe_slow); // XXX
7312 %}
7313
7314 // Load Double
7315 instruct loadD_partial(regD dst, memory mem)
7316 %{
7317 predicate(!UseXmmLoadAndClearUpper);
7318 match(Set dst (LoadD mem));
7319
7320 ins_cost(145); // XXX
7321 format %{ "movlpd $dst, $mem\t# double" %}
7322 ins_encode %{
7323 __ movdbl($dst$$XMMRegister, $mem$$Address);
7324 %}
7325 ins_pipe(pipe_slow); // XXX
7326 %}
7327
7328 instruct loadD(regD dst, memory mem)
7329 %{
7330 predicate(UseXmmLoadAndClearUpper);
7331 match(Set dst (LoadD mem));
7332
7333 ins_cost(145); // XXX
7334 format %{ "movsd $dst, $mem\t# double" %}
7335 ins_encode %{
7336 __ movdbl($dst$$XMMRegister, $mem$$Address);
7337 %}
7338 ins_pipe(pipe_slow); // XXX
7339 %}
7340
7341 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
7342 %{
7343 match(Set dst con);
7344
7345 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
7346
7347 ins_encode %{
7348 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
7349 %}
7350
7351 ins_pipe(ialu_reg_fat);
7352 %}
7353
7354 // min = java.lang.Math.min(float a, float b)
7355 // max = java.lang.Math.max(float a, float b)
7356 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
7357 %{
7358 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7359 match(Set dst (MaxF a b));
7360 match(Set dst (MinF a b));
7361
7362 format %{ "minmaxF $dst, $a, $b" %}
7363 ins_encode %{
7364 int opcode = this->ideal_Opcode();
7365 __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7366 %}
7367 ins_pipe( pipe_slow );
7368 %}
7369
7370 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
7371 %{
7372 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7373 match(Set dst (MaxF a b));
7374 match(Set dst (MinF a b));
7375 effect(USE a, USE b, TEMP rtmp, KILL cr);
7376
7377 format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7378 ins_encode %{
7379 int opcode = this->ideal_Opcode();
7380 bool min = (opcode == Op_MinF) ? true : false;
7381 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7382 min, fp_prec_flt /*pt*/);
7383 %}
7384 ins_pipe( pipe_slow );
7385 %}
7386
7387 // min = java.lang.Math.min(float a, float b)
7388 // max = java.lang.Math.max(float a, float b)
7389 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
7390 %{
7391 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7392 match(Set dst (MaxF a b));
7393 match(Set dst (MinF a b));
7394 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
7395
7396 format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7397 ins_encode %{
7398 int opcode = this->ideal_Opcode();
7399 int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
7400 __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7401 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7402 %}
7403 ins_pipe( pipe_slow );
7404 %}
7405
7406 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
7407 %{
7408 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7409 match(Set dst (MaxF a b));
7410 match(Set dst (MinF a b));
7411 effect(USE a, USE b, TEMP rtmp, KILL cr);
7412
7413 format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
7414 ins_encode %{
7415 int opcode = this->ideal_Opcode();
7416 bool min = (opcode == Op_MinF) ? true : false;
7417 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7418 min, fp_prec_flt /*pt*/);
7419 %}
7420 ins_pipe( pipe_slow );
7421 %}
7422
7423 // min = java.lang.Math.min(double a, double b)
7424 // max = java.lang.Math.max(double a, double b)
7425 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
7426 %{
7427 predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
7428 match(Set dst (MaxD a b));
7429 match(Set dst (MinD a b));
7430
7431 format %{ "minmaxD $dst, $a, $b" %}
7432 ins_encode %{
7433 int opcode = this->ideal_Opcode();
7434 __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
7435 %}
7436 ins_pipe( pipe_slow );
7437 %}
7438
7439 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
7440 %{
7441 predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
7442 match(Set dst (MaxD a b));
7443 match(Set dst (MinD a b));
7444 effect(USE a, USE b, TEMP rtmp, KILL cr);
7445
7446 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7447 ins_encode %{
7448 int opcode = this->ideal_Opcode();
7449 bool min = (opcode == Op_MinD) ? true : false;
7450 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7451 min, fp_prec_dbl /*pt*/);
7452 %}
7453 ins_pipe( pipe_slow );
7454 %}
7455
7456 // min = java.lang.Math.min(double a, double b)
7457 // max = java.lang.Math.max(double a, double b)
7458 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
7459 %{
7460 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
7461 match(Set dst (MaxD a b));
7462 match(Set dst (MinD a b));
7463 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
7464
7465 format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
7466 ins_encode %{
7467 int opcode = this->ideal_Opcode();
7468 int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
7469 __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
7470 $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
7471 %}
7472 ins_pipe( pipe_slow );
7473 %}
7474
7475 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
7476 %{
7477 predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
7478 match(Set dst (MaxD a b));
7479 match(Set dst (MinD a b));
7480 effect(USE a, USE b, TEMP rtmp, KILL cr);
7481
7482 format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
7483 ins_encode %{
7484 int opcode = this->ideal_Opcode();
7485 bool min = (opcode == Op_MinD) ? true : false;
7486 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
7487 min, fp_prec_dbl /*pt*/);
7488 %}
7489 ins_pipe( pipe_slow );
7490 %}
7491
7492 // Load Effective Address
7493 instruct leaP8(rRegP dst, indOffset8 mem)
7494 %{
7495 match(Set dst mem);
7496
7497 ins_cost(110); // XXX
7498 format %{ "leaq $dst, $mem\t# ptr 8" %}
7499 ins_encode %{
7500 __ leaq($dst$$Register, $mem$$Address);
7501 %}
7502 ins_pipe(ialu_reg_reg_fat);
7503 %}
7504
7505 instruct leaP32(rRegP dst, indOffset32 mem)
7506 %{
7507 match(Set dst mem);
7508
7509 ins_cost(110);
7510 format %{ "leaq $dst, $mem\t# ptr 32" %}
7511 ins_encode %{
7512 __ leaq($dst$$Register, $mem$$Address);
7513 %}
7514 ins_pipe(ialu_reg_reg_fat);
7515 %}
7516
7517 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
7518 %{
7519 match(Set dst mem);
7520
7521 ins_cost(110);
7522 format %{ "leaq $dst, $mem\t# ptr idxoff" %}
7523 ins_encode %{
7524 __ leaq($dst$$Register, $mem$$Address);
7525 %}
7526 ins_pipe(ialu_reg_reg_fat);
7527 %}
7528
7529 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
7530 %{
7531 match(Set dst mem);
7532
7533 ins_cost(110);
7534 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7535 ins_encode %{
7536 __ leaq($dst$$Register, $mem$$Address);
7537 %}
7538 ins_pipe(ialu_reg_reg_fat);
7539 %}
7540
7541 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
7542 %{
7543 match(Set dst mem);
7544
7545 ins_cost(110);
7546 format %{ "leaq $dst, $mem\t# ptr idxscale" %}
7547 ins_encode %{
7548 __ leaq($dst$$Register, $mem$$Address);
7549 %}
7550 ins_pipe(ialu_reg_reg_fat);
7551 %}
7552
7553 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
7554 %{
7555 match(Set dst mem);
7556
7557 ins_cost(110);
7558 format %{ "leaq $dst, $mem\t# ptr idxscaleoff" %}
7559 ins_encode %{
7560 __ leaq($dst$$Register, $mem$$Address);
7561 %}
7562 ins_pipe(ialu_reg_reg_fat);
7563 %}
7564
7565 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
7566 %{
7567 match(Set dst mem);
7568
7569 ins_cost(110);
7570 format %{ "leaq $dst, $mem\t# ptr posidxoff" %}
7571 ins_encode %{
7572 __ leaq($dst$$Register, $mem$$Address);
7573 %}
7574 ins_pipe(ialu_reg_reg_fat);
7575 %}
7576
7577 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
7578 %{
7579 match(Set dst mem);
7580
7581 ins_cost(110);
7582 format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
7583 ins_encode %{
7584 __ leaq($dst$$Register, $mem$$Address);
7585 %}
7586 ins_pipe(ialu_reg_reg_fat);
7587 %}
7588
7589 // Load Effective Address which uses Narrow (32-bits) oop
7590 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
7591 %{
7592 predicate(UseCompressedOops && (CompressedOops::shift() != 0));
7593 match(Set dst mem);
7594
7595 ins_cost(110);
7596 format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
7597 ins_encode %{
7598 __ leaq($dst$$Register, $mem$$Address);
7599 %}
7600 ins_pipe(ialu_reg_reg_fat);
7601 %}
7602
7603 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
7604 %{
7605 predicate(CompressedOops::shift() == 0);
7606 match(Set dst mem);
7607
7608 ins_cost(110); // XXX
7609 format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
7610 ins_encode %{
7611 __ leaq($dst$$Register, $mem$$Address);
7612 %}
7613 ins_pipe(ialu_reg_reg_fat);
7614 %}
7615
7616 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
7617 %{
7618 predicate(CompressedOops::shift() == 0);
7619 match(Set dst mem);
7620
7621 ins_cost(110);
7622 format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
7623 ins_encode %{
7624 __ leaq($dst$$Register, $mem$$Address);
7625 %}
7626 ins_pipe(ialu_reg_reg_fat);
7627 %}
7628
7629 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
7630 %{
7631 predicate(CompressedOops::shift() == 0);
7632 match(Set dst mem);
7633
7634 ins_cost(110);
7635 format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
7636 ins_encode %{
7637 __ leaq($dst$$Register, $mem$$Address);
7638 %}
7639 ins_pipe(ialu_reg_reg_fat);
7640 %}
7641
7642 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
7643 %{
7644 predicate(CompressedOops::shift() == 0);
7645 match(Set dst mem);
7646
7647 ins_cost(110);
7648 format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
7649 ins_encode %{
7650 __ leaq($dst$$Register, $mem$$Address);
7651 %}
7652 ins_pipe(ialu_reg_reg_fat);
7653 %}
7654
7655 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
7656 %{
7657 predicate(CompressedOops::shift() == 0);
7658 match(Set dst mem);
7659
7660 ins_cost(110);
7661 format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
7662 ins_encode %{
7663 __ leaq($dst$$Register, $mem$$Address);
7664 %}
7665 ins_pipe(ialu_reg_reg_fat);
7666 %}
7667
7668 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
7669 %{
7670 predicate(CompressedOops::shift() == 0);
7671 match(Set dst mem);
7672
7673 ins_cost(110);
7674 format %{ "leaq $dst, $mem\t# ptr posidxoffnarrow" %}
7675 ins_encode %{
7676 __ leaq($dst$$Register, $mem$$Address);
7677 %}
7678 ins_pipe(ialu_reg_reg_fat);
7679 %}
7680
7681 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
7682 %{
7683 predicate(CompressedOops::shift() == 0);
7684 match(Set dst mem);
7685
7686 ins_cost(110);
7687 format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
7688 ins_encode %{
7689 __ leaq($dst$$Register, $mem$$Address);
7690 %}
7691 ins_pipe(ialu_reg_reg_fat);
7692 %}
7693
7694 instruct loadConI(rRegI dst, immI src)
7695 %{
7696 match(Set dst src);
7697
7698 format %{ "movl $dst, $src\t# int" %}
7699 ins_encode %{
7700 __ movl($dst$$Register, $src$$constant);
7701 %}
7702 ins_pipe(ialu_reg_fat); // XXX
7703 %}
7704
7705 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
7706 %{
7707 match(Set dst src);
7708 effect(KILL cr);
7709
7710 ins_cost(50);
7711 format %{ "xorl $dst, $dst\t# int" %}
7712 ins_encode %{
7713 __ xorl($dst$$Register, $dst$$Register);
7714 %}
7715 ins_pipe(ialu_reg);
7716 %}
7717
7718 instruct loadConL(rRegL dst, immL src)
7719 %{
7720 match(Set dst src);
7721
7722 ins_cost(150);
7723 format %{ "movq $dst, $src\t# long" %}
7724 ins_encode %{
7725 __ mov64($dst$$Register, $src$$constant);
7726 %}
7727 ins_pipe(ialu_reg);
7728 %}
7729
7730 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
7731 %{
7732 match(Set dst src);
7733 effect(KILL cr);
7734
7735 ins_cost(50);
7736 format %{ "xorl $dst, $dst\t# long" %}
7737 ins_encode %{
7738 __ xorl($dst$$Register, $dst$$Register);
7739 %}
7740 ins_pipe(ialu_reg); // XXX
7741 %}
7742
7743 instruct loadConUL32(rRegL dst, immUL32 src)
7744 %{
7745 match(Set dst src);
7746
7747 ins_cost(60);
7748 format %{ "movl $dst, $src\t# long (unsigned 32-bit)" %}
7749 ins_encode %{
7750 __ movl($dst$$Register, $src$$constant);
7751 %}
7752 ins_pipe(ialu_reg);
7753 %}
7754
7755 instruct loadConL32(rRegL dst, immL32 src)
7756 %{
7757 match(Set dst src);
7758
7759 ins_cost(70);
7760 format %{ "movq $dst, $src\t# long (32-bit)" %}
7761 ins_encode %{
7762 __ movq($dst$$Register, $src$$constant);
7763 %}
7764 ins_pipe(ialu_reg);
7765 %}
7766
7767 instruct loadConP(rRegP dst, immP con) %{
7768 match(Set dst con);
7769
7770 format %{ "movq $dst, $con\t# ptr" %}
7771 ins_encode %{
7772 __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
7773 %}
7774 ins_pipe(ialu_reg_fat); // XXX
7775 %}
7776
7777 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
7778 %{
7779 match(Set dst src);
7780 effect(KILL cr);
7781
7782 ins_cost(50);
7783 format %{ "xorl $dst, $dst\t# ptr" %}
7784 ins_encode %{
7785 __ xorl($dst$$Register, $dst$$Register);
7786 %}
7787 ins_pipe(ialu_reg);
7788 %}
7789
7790 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
7791 %{
7792 match(Set dst src);
7793 effect(KILL cr);
7794
7795 ins_cost(60);
7796 format %{ "movl $dst, $src\t# ptr (positive 32-bit)" %}
7797 ins_encode %{
7798 __ movl($dst$$Register, $src$$constant);
7799 %}
7800 ins_pipe(ialu_reg);
7801 %}
7802
7803 instruct loadConF(regF dst, immF con) %{
7804 match(Set dst con);
7805 ins_cost(125);
7806 format %{ "movss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
7807 ins_encode %{
7808 __ movflt($dst$$XMMRegister, $constantaddress($con));
7809 %}
7810 ins_pipe(pipe_slow);
7811 %}
7812
7813 instruct loadConH(regF dst, immH con) %{
7814 match(Set dst con);
7815 ins_cost(125);
7816 format %{ "movss $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
7817 ins_encode %{
7818 __ movflt($dst$$XMMRegister, $constantaddress($con));
7819 %}
7820 ins_pipe(pipe_slow);
7821 %}
7822
7823 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
7824 match(Set dst src);
7825 effect(KILL cr);
7826 format %{ "xorq $dst, $src\t# compressed null pointer" %}
7827 ins_encode %{
7828 __ xorq($dst$$Register, $dst$$Register);
7829 %}
7830 ins_pipe(ialu_reg);
7831 %}
7832
7833 instruct loadConN(rRegN dst, immN src) %{
7834 match(Set dst src);
7835
7836 ins_cost(125);
7837 format %{ "movl $dst, $src\t# compressed ptr" %}
7838 ins_encode %{
7839 address con = (address)$src$$constant;
7840 if (con == nullptr) {
7841 ShouldNotReachHere();
7842 } else {
7843 __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
7844 }
7845 %}
7846 ins_pipe(ialu_reg_fat); // XXX
7847 %}
7848
7849 instruct loadConNKlass(rRegN dst, immNKlass src) %{
7850 match(Set dst src);
7851
7852 ins_cost(125);
7853 format %{ "movl $dst, $src\t# compressed klass ptr" %}
7854 ins_encode %{
7855 address con = (address)$src$$constant;
7856 if (con == nullptr) {
7857 ShouldNotReachHere();
7858 } else {
7859 __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
7860 }
7861 %}
7862 ins_pipe(ialu_reg_fat); // XXX
7863 %}
7864
7865 instruct loadConF0(regF dst, immF0 src)
7866 %{
7867 match(Set dst src);
7868 ins_cost(100);
7869
7870 format %{ "xorps $dst, $dst\t# float 0.0" %}
7871 ins_encode %{
7872 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7873 %}
7874 ins_pipe(pipe_slow);
7875 %}
7876
7877 // Use the same format since predicate() can not be used here.
7878 instruct loadConD(regD dst, immD con) %{
7879 match(Set dst con);
7880 ins_cost(125);
7881 format %{ "movsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
7882 ins_encode %{
7883 __ movdbl($dst$$XMMRegister, $constantaddress($con));
7884 %}
7885 ins_pipe(pipe_slow);
7886 %}
7887
7888 instruct loadConD0(regD dst, immD0 src)
7889 %{
7890 match(Set dst src);
7891 ins_cost(100);
7892
7893 format %{ "xorpd $dst, $dst\t# double 0.0" %}
7894 ins_encode %{
7895 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
7896 %}
7897 ins_pipe(pipe_slow);
7898 %}
7899
7900 instruct loadSSI(rRegI dst, stackSlotI src)
7901 %{
7902 match(Set dst src);
7903
7904 ins_cost(125);
7905 format %{ "movl $dst, $src\t# int stk" %}
7906 ins_encode %{
7907 __ movl($dst$$Register, $src$$Address);
7908 %}
7909 ins_pipe(ialu_reg_mem);
7910 %}
7911
7912 instruct loadSSL(rRegL dst, stackSlotL src)
7913 %{
7914 match(Set dst src);
7915
7916 ins_cost(125);
7917 format %{ "movq $dst, $src\t# long stk" %}
7918 ins_encode %{
7919 __ movq($dst$$Register, $src$$Address);
7920 %}
7921 ins_pipe(ialu_reg_mem);
7922 %}
7923
7924 instruct loadSSP(rRegP dst, stackSlotP src)
7925 %{
7926 match(Set dst src);
7927
7928 ins_cost(125);
7929 format %{ "movq $dst, $src\t# ptr stk" %}
7930 ins_encode %{
7931 __ movq($dst$$Register, $src$$Address);
7932 %}
7933 ins_pipe(ialu_reg_mem);
7934 %}
7935
7936 instruct loadSSF(regF dst, stackSlotF src)
7937 %{
7938 match(Set dst src);
7939
7940 ins_cost(125);
7941 format %{ "movss $dst, $src\t# float stk" %}
7942 ins_encode %{
7943 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
7944 %}
7945 ins_pipe(pipe_slow); // XXX
7946 %}
7947
7948 // Use the same format since predicate() can not be used here.
7949 instruct loadSSD(regD dst, stackSlotD src)
7950 %{
7951 match(Set dst src);
7952
7953 ins_cost(125);
7954 format %{ "movsd $dst, $src\t# double stk" %}
7955 ins_encode %{
7956 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
7957 %}
7958 ins_pipe(pipe_slow); // XXX
7959 %}
7960
7961 // Prefetch instructions for allocation.
7962 // Must be safe to execute with invalid address (cannot fault).
7963
7964 instruct prefetchAlloc( memory mem ) %{
7965 predicate(AllocatePrefetchInstr==3);
7966 match(PrefetchAllocation mem);
7967 ins_cost(125);
7968
7969 format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
7970 ins_encode %{
7971 __ prefetchw($mem$$Address);
7972 %}
7973 ins_pipe(ialu_mem);
7974 %}
7975
7976 instruct prefetchAllocNTA( memory mem ) %{
7977 predicate(AllocatePrefetchInstr==0);
7978 match(PrefetchAllocation mem);
7979 ins_cost(125);
7980
7981 format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
7982 ins_encode %{
7983 __ prefetchnta($mem$$Address);
7984 %}
7985 ins_pipe(ialu_mem);
7986 %}
7987
7988 instruct prefetchAllocT0( memory mem ) %{
7989 predicate(AllocatePrefetchInstr==1);
7990 match(PrefetchAllocation mem);
7991 ins_cost(125);
7992
7993 format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
7994 ins_encode %{
7995 __ prefetcht0($mem$$Address);
7996 %}
7997 ins_pipe(ialu_mem);
7998 %}
7999
8000 instruct prefetchAllocT2( memory mem ) %{
8001 predicate(AllocatePrefetchInstr==2);
8002 match(PrefetchAllocation mem);
8003 ins_cost(125);
8004
8005 format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
8006 ins_encode %{
8007 __ prefetcht2($mem$$Address);
8008 %}
8009 ins_pipe(ialu_mem);
8010 %}
8011
8012 //----------Store Instructions-------------------------------------------------
8013
8014 // Store Byte
8015 instruct storeB(memory mem, rRegI src)
8016 %{
8017 match(Set mem (StoreB mem src));
8018
8019 ins_cost(125); // XXX
8020 format %{ "movb $mem, $src\t# byte" %}
8021 ins_encode %{
8022 __ movb($mem$$Address, $src$$Register);
8023 %}
8024 ins_pipe(ialu_mem_reg);
8025 %}
8026
8027 // Store Char/Short
8028 instruct storeC(memory mem, rRegI src)
8029 %{
8030 match(Set mem (StoreC mem src));
8031
8032 ins_cost(125); // XXX
8033 format %{ "movw $mem, $src\t# char/short" %}
8034 ins_encode %{
8035 __ movw($mem$$Address, $src$$Register);
8036 %}
8037 ins_pipe(ialu_mem_reg);
8038 %}
8039
8040 // Store Integer
8041 instruct storeI(memory mem, rRegI src)
8042 %{
8043 match(Set mem (StoreI mem src));
8044
8045 ins_cost(125); // XXX
8046 format %{ "movl $mem, $src\t# int" %}
8047 ins_encode %{
8048 __ movl($mem$$Address, $src$$Register);
8049 %}
8050 ins_pipe(ialu_mem_reg);
8051 %}
8052
8053 // Store Long
8054 instruct storeL(memory mem, rRegL src)
8055 %{
8056 match(Set mem (StoreL mem src));
8057
8058 ins_cost(125); // XXX
8059 format %{ "movq $mem, $src\t# long" %}
8060 ins_encode %{
8061 __ movq($mem$$Address, $src$$Register);
8062 %}
8063 ins_pipe(ialu_mem_reg); // XXX
8064 %}
8065
8066 // Store Pointer
8067 instruct storeP(memory mem, any_RegP src)
8068 %{
8069 predicate(n->as_Store()->barrier_data() == 0);
8070 match(Set mem (StoreP mem src));
8071
8072 ins_cost(125); // XXX
8073 format %{ "movq $mem, $src\t# ptr" %}
8074 ins_encode %{
8075 __ movq($mem$$Address, $src$$Register);
8076 %}
8077 ins_pipe(ialu_mem_reg);
8078 %}
8079
8080 instruct storeImmP0(memory mem, immP0 zero)
8081 %{
8082 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
8083 match(Set mem (StoreP mem zero));
8084
8085 ins_cost(125); // XXX
8086 format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
8087 ins_encode %{
8088 __ movq($mem$$Address, r12);
8089 %}
8090 ins_pipe(ialu_mem_reg);
8091 %}
8092
8093 // Store Null Pointer, mark word, or other simple pointer constant.
8094 instruct storeImmP(memory mem, immP31 src)
8095 %{
8096 predicate(n->as_Store()->barrier_data() == 0);
8097 match(Set mem (StoreP mem src));
8098
8099 ins_cost(150); // XXX
8100 format %{ "movq $mem, $src\t# ptr" %}
8101 ins_encode %{
8102 __ movq($mem$$Address, $src$$constant);
8103 %}
8104 ins_pipe(ialu_mem_imm);
8105 %}
8106
8107 // Store Compressed Pointer
8108 instruct storeN(memory mem, rRegN src)
8109 %{
8110 predicate(n->as_Store()->barrier_data() == 0);
8111 match(Set mem (StoreN mem src));
8112
8113 ins_cost(125); // XXX
8114 format %{ "movl $mem, $src\t# compressed ptr" %}
8115 ins_encode %{
8116 __ movl($mem$$Address, $src$$Register);
8117 %}
8118 ins_pipe(ialu_mem_reg);
8119 %}
8120
8121 instruct storeNKlass(memory mem, rRegN src)
8122 %{
8123 match(Set mem (StoreNKlass mem src));
8124
8125 ins_cost(125); // XXX
8126 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8127 ins_encode %{
8128 __ movl($mem$$Address, $src$$Register);
8129 %}
8130 ins_pipe(ialu_mem_reg);
8131 %}
8132
8133 instruct storeImmN0(memory mem, immN0 zero)
8134 %{
8135 predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
8136 match(Set mem (StoreN mem zero));
8137
8138 ins_cost(125); // XXX
8139 format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
8140 ins_encode %{
8141 __ movl($mem$$Address, r12);
8142 %}
8143 ins_pipe(ialu_mem_reg);
8144 %}
8145
8146 instruct storeImmN(memory mem, immN src)
8147 %{
8148 predicate(n->as_Store()->barrier_data() == 0);
8149 match(Set mem (StoreN mem src));
8150
8151 ins_cost(150); // XXX
8152 format %{ "movl $mem, $src\t# compressed ptr" %}
8153 ins_encode %{
8154 address con = (address)$src$$constant;
8155 if (con == nullptr) {
8156 __ movl($mem$$Address, 0);
8157 } else {
8158 __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
8159 }
8160 %}
8161 ins_pipe(ialu_mem_imm);
8162 %}
8163
8164 instruct storeImmNKlass(memory mem, immNKlass src)
8165 %{
8166 match(Set mem (StoreNKlass mem src));
8167
8168 ins_cost(150); // XXX
8169 format %{ "movl $mem, $src\t# compressed klass ptr" %}
8170 ins_encode %{
8171 __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
8172 %}
8173 ins_pipe(ialu_mem_imm);
8174 %}
8175
8176 // Store Integer Immediate
8177 instruct storeImmI0(memory mem, immI_0 zero)
8178 %{
8179 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8180 match(Set mem (StoreI mem zero));
8181
8182 ins_cost(125); // XXX
8183 format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
8184 ins_encode %{
8185 __ movl($mem$$Address, r12);
8186 %}
8187 ins_pipe(ialu_mem_reg);
8188 %}
8189
8190 instruct storeImmI(memory mem, immI src)
8191 %{
8192 match(Set mem (StoreI mem src));
8193
8194 ins_cost(150);
8195 format %{ "movl $mem, $src\t# int" %}
8196 ins_encode %{
8197 __ movl($mem$$Address, $src$$constant);
8198 %}
8199 ins_pipe(ialu_mem_imm);
8200 %}
8201
8202 // Store Long Immediate
8203 instruct storeImmL0(memory mem, immL0 zero)
8204 %{
8205 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8206 match(Set mem (StoreL mem zero));
8207
8208 ins_cost(125); // XXX
8209 format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
8210 ins_encode %{
8211 __ movq($mem$$Address, r12);
8212 %}
8213 ins_pipe(ialu_mem_reg);
8214 %}
8215
8216 instruct storeImmL(memory mem, immL32 src)
8217 %{
8218 match(Set mem (StoreL mem src));
8219
8220 ins_cost(150);
8221 format %{ "movq $mem, $src\t# long" %}
8222 ins_encode %{
8223 __ movq($mem$$Address, $src$$constant);
8224 %}
8225 ins_pipe(ialu_mem_imm);
8226 %}
8227
8228 // Store Short/Char Immediate
8229 instruct storeImmC0(memory mem, immI_0 zero)
8230 %{
8231 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8232 match(Set mem (StoreC mem zero));
8233
8234 ins_cost(125); // XXX
8235 format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
8236 ins_encode %{
8237 __ movw($mem$$Address, r12);
8238 %}
8239 ins_pipe(ialu_mem_reg);
8240 %}
8241
8242 instruct storeImmI16(memory mem, immI16 src)
8243 %{
8244 predicate(UseStoreImmI16);
8245 match(Set mem (StoreC mem src));
8246
8247 ins_cost(150);
8248 format %{ "movw $mem, $src\t# short/char" %}
8249 ins_encode %{
8250 __ movw($mem$$Address, $src$$constant);
8251 %}
8252 ins_pipe(ialu_mem_imm);
8253 %}
8254
8255 // Store Byte Immediate
8256 instruct storeImmB0(memory mem, immI_0 zero)
8257 %{
8258 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8259 match(Set mem (StoreB mem zero));
8260
8261 ins_cost(125); // XXX
8262 format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
8263 ins_encode %{
8264 __ movb($mem$$Address, r12);
8265 %}
8266 ins_pipe(ialu_mem_reg);
8267 %}
8268
8269 instruct storeImmB(memory mem, immI8 src)
8270 %{
8271 match(Set mem (StoreB mem src));
8272
8273 ins_cost(150); // XXX
8274 format %{ "movb $mem, $src\t# byte" %}
8275 ins_encode %{
8276 __ movb($mem$$Address, $src$$constant);
8277 %}
8278 ins_pipe(ialu_mem_imm);
8279 %}
8280
8281 // Store Float
8282 instruct storeF(memory mem, regF src)
8283 %{
8284 match(Set mem (StoreF mem src));
8285
8286 ins_cost(95); // XXX
8287 format %{ "movss $mem, $src\t# float" %}
8288 ins_encode %{
8289 __ movflt($mem$$Address, $src$$XMMRegister);
8290 %}
8291 ins_pipe(pipe_slow); // XXX
8292 %}
8293
8294 // Store immediate Float value (it is faster than store from XMM register)
8295 instruct storeF0(memory mem, immF0 zero)
8296 %{
8297 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8298 match(Set mem (StoreF mem zero));
8299
8300 ins_cost(25); // XXX
8301 format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
8302 ins_encode %{
8303 __ movl($mem$$Address, r12);
8304 %}
8305 ins_pipe(ialu_mem_reg);
8306 %}
8307
8308 instruct storeF_imm(memory mem, immF src)
8309 %{
8310 match(Set mem (StoreF mem src));
8311
8312 ins_cost(50);
8313 format %{ "movl $mem, $src\t# float" %}
8314 ins_encode %{
8315 __ movl($mem$$Address, jint_cast($src$$constant));
8316 %}
8317 ins_pipe(ialu_mem_imm);
8318 %}
8319
8320 // Store Double
8321 instruct storeD(memory mem, regD src)
8322 %{
8323 match(Set mem (StoreD mem src));
8324
8325 ins_cost(95); // XXX
8326 format %{ "movsd $mem, $src\t# double" %}
8327 ins_encode %{
8328 __ movdbl($mem$$Address, $src$$XMMRegister);
8329 %}
8330 ins_pipe(pipe_slow); // XXX
8331 %}
8332
8333 // Store immediate double 0.0 (it is faster than store from XMM register)
8334 instruct storeD0_imm(memory mem, immD0 src)
8335 %{
8336 predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
8337 match(Set mem (StoreD mem src));
8338
8339 ins_cost(50);
8340 format %{ "movq $mem, $src\t# double 0." %}
8341 ins_encode %{
8342 __ movq($mem$$Address, $src$$constant);
8343 %}
8344 ins_pipe(ialu_mem_imm);
8345 %}
8346
8347 instruct storeD0(memory mem, immD0 zero)
8348 %{
8349 predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
8350 match(Set mem (StoreD mem zero));
8351
8352 ins_cost(25); // XXX
8353 format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
8354 ins_encode %{
8355 __ movq($mem$$Address, r12);
8356 %}
8357 ins_pipe(ialu_mem_reg);
8358 %}
8359
8360 instruct storeSSI(stackSlotI dst, rRegI src)
8361 %{
8362 match(Set dst src);
8363
8364 ins_cost(100);
8365 format %{ "movl $dst, $src\t# int stk" %}
8366 ins_encode %{
8367 __ movl($dst$$Address, $src$$Register);
8368 %}
8369 ins_pipe( ialu_mem_reg );
8370 %}
8371
8372 instruct storeSSL(stackSlotL dst, rRegL src)
8373 %{
8374 match(Set dst src);
8375
8376 ins_cost(100);
8377 format %{ "movq $dst, $src\t# long stk" %}
8378 ins_encode %{
8379 __ movq($dst$$Address, $src$$Register);
8380 %}
8381 ins_pipe(ialu_mem_reg);
8382 %}
8383
8384 instruct storeSSP(stackSlotP dst, rRegP src)
8385 %{
8386 match(Set dst src);
8387
8388 ins_cost(100);
8389 format %{ "movq $dst, $src\t# ptr stk" %}
8390 ins_encode %{
8391 __ movq($dst$$Address, $src$$Register);
8392 %}
8393 ins_pipe(ialu_mem_reg);
8394 %}
8395
8396 instruct storeSSF(stackSlotF dst, regF src)
8397 %{
8398 match(Set dst src);
8399
8400 ins_cost(95); // XXX
8401 format %{ "movss $dst, $src\t# float stk" %}
8402 ins_encode %{
8403 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
8404 %}
8405 ins_pipe(pipe_slow); // XXX
8406 %}
8407
8408 instruct storeSSD(stackSlotD dst, regD src)
8409 %{
8410 match(Set dst src);
8411
8412 ins_cost(95); // XXX
8413 format %{ "movsd $dst, $src\t# double stk" %}
8414 ins_encode %{
8415 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
8416 %}
8417 ins_pipe(pipe_slow); // XXX
8418 %}
8419
8420 instruct cacheWB(indirect addr)
8421 %{
8422 predicate(VM_Version::supports_data_cache_line_flush());
8423 match(CacheWB addr);
8424
8425 ins_cost(100);
8426 format %{"cache wb $addr" %}
8427 ins_encode %{
8428 assert($addr->index_position() < 0, "should be");
8429 assert($addr$$disp == 0, "should be");
8430 __ cache_wb(Address($addr$$base$$Register, 0));
8431 %}
8432 ins_pipe(pipe_slow); // XXX
8433 %}
8434
8435 instruct cacheWBPreSync()
8436 %{
8437 predicate(VM_Version::supports_data_cache_line_flush());
8438 match(CacheWBPreSync);
8439
8440 ins_cost(100);
8441 format %{"cache wb presync" %}
8442 ins_encode %{
8443 __ cache_wbsync(true);
8444 %}
8445 ins_pipe(pipe_slow); // XXX
8446 %}
8447
8448 instruct cacheWBPostSync()
8449 %{
8450 predicate(VM_Version::supports_data_cache_line_flush());
8451 match(CacheWBPostSync);
8452
8453 ins_cost(100);
8454 format %{"cache wb postsync" %}
8455 ins_encode %{
8456 __ cache_wbsync(false);
8457 %}
8458 ins_pipe(pipe_slow); // XXX
8459 %}
8460
8461 //----------BSWAP Instructions-------------------------------------------------
8462 instruct bytes_reverse_int(rRegI dst) %{
8463 match(Set dst (ReverseBytesI dst));
8464
8465 format %{ "bswapl $dst" %}
8466 ins_encode %{
8467 __ bswapl($dst$$Register);
8468 %}
8469 ins_pipe( ialu_reg );
8470 %}
8471
8472 instruct bytes_reverse_long(rRegL dst) %{
8473 match(Set dst (ReverseBytesL dst));
8474
8475 format %{ "bswapq $dst" %}
8476 ins_encode %{
8477 __ bswapq($dst$$Register);
8478 %}
8479 ins_pipe( ialu_reg);
8480 %}
8481
8482 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
8483 match(Set dst (ReverseBytesUS dst));
8484 effect(KILL cr);
8485
8486 format %{ "bswapl $dst\n\t"
8487 "shrl $dst,16\n\t" %}
8488 ins_encode %{
8489 __ bswapl($dst$$Register);
8490 __ shrl($dst$$Register, 16);
8491 %}
8492 ins_pipe( ialu_reg );
8493 %}
8494
8495 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
8496 match(Set dst (ReverseBytesS dst));
8497 effect(KILL cr);
8498
8499 format %{ "bswapl $dst\n\t"
8500 "sar $dst,16\n\t" %}
8501 ins_encode %{
8502 __ bswapl($dst$$Register);
8503 __ sarl($dst$$Register, 16);
8504 %}
8505 ins_pipe( ialu_reg );
8506 %}
8507
8508 //---------- Zeros Count Instructions ------------------------------------------
8509
8510 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8511 predicate(UseCountLeadingZerosInstruction);
8512 match(Set dst (CountLeadingZerosI src));
8513 effect(KILL cr);
8514
8515 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8516 ins_encode %{
8517 __ lzcntl($dst$$Register, $src$$Register);
8518 %}
8519 ins_pipe(ialu_reg);
8520 %}
8521
8522 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8523 predicate(UseCountLeadingZerosInstruction);
8524 match(Set dst (CountLeadingZerosI (LoadI src)));
8525 effect(KILL cr);
8526 ins_cost(175);
8527 format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %}
8528 ins_encode %{
8529 __ lzcntl($dst$$Register, $src$$Address);
8530 %}
8531 ins_pipe(ialu_reg_mem);
8532 %}
8533
8534 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
8535 predicate(!UseCountLeadingZerosInstruction);
8536 match(Set dst (CountLeadingZerosI src));
8537 effect(KILL cr);
8538
8539 format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t"
8540 "jnz skip\n\t"
8541 "movl $dst, -1\n"
8542 "skip:\n\t"
8543 "negl $dst\n\t"
8544 "addl $dst, 31" %}
8545 ins_encode %{
8546 Register Rdst = $dst$$Register;
8547 Register Rsrc = $src$$Register;
8548 Label skip;
8549 __ bsrl(Rdst, Rsrc);
8550 __ jccb(Assembler::notZero, skip);
8551 __ movl(Rdst, -1);
8552 __ bind(skip);
8553 __ negl(Rdst);
8554 __ addl(Rdst, BitsPerInt - 1);
8555 %}
8556 ins_pipe(ialu_reg);
8557 %}
8558
8559 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8560 predicate(UseCountLeadingZerosInstruction);
8561 match(Set dst (CountLeadingZerosL src));
8562 effect(KILL cr);
8563
8564 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8565 ins_encode %{
8566 __ lzcntq($dst$$Register, $src$$Register);
8567 %}
8568 ins_pipe(ialu_reg);
8569 %}
8570
8571 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8572 predicate(UseCountLeadingZerosInstruction);
8573 match(Set dst (CountLeadingZerosL (LoadL src)));
8574 effect(KILL cr);
8575 ins_cost(175);
8576 format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %}
8577 ins_encode %{
8578 __ lzcntq($dst$$Register, $src$$Address);
8579 %}
8580 ins_pipe(ialu_reg_mem);
8581 %}
8582
8583 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
8584 predicate(!UseCountLeadingZerosInstruction);
8585 match(Set dst (CountLeadingZerosL src));
8586 effect(KILL cr);
8587
8588 format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t"
8589 "jnz skip\n\t"
8590 "movl $dst, -1\n"
8591 "skip:\n\t"
8592 "negl $dst\n\t"
8593 "addl $dst, 63" %}
8594 ins_encode %{
8595 Register Rdst = $dst$$Register;
8596 Register Rsrc = $src$$Register;
8597 Label skip;
8598 __ bsrq(Rdst, Rsrc);
8599 __ jccb(Assembler::notZero, skip);
8600 __ movl(Rdst, -1);
8601 __ bind(skip);
8602 __ negl(Rdst);
8603 __ addl(Rdst, BitsPerLong - 1);
8604 %}
8605 ins_pipe(ialu_reg);
8606 %}
8607
8608 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
8609 predicate(UseCountTrailingZerosInstruction);
8610 match(Set dst (CountTrailingZerosI src));
8611 effect(KILL cr);
8612
8613 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8614 ins_encode %{
8615 __ tzcntl($dst$$Register, $src$$Register);
8616 %}
8617 ins_pipe(ialu_reg);
8618 %}
8619
8620 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
8621 predicate(UseCountTrailingZerosInstruction);
8622 match(Set dst (CountTrailingZerosI (LoadI src)));
8623 effect(KILL cr);
8624 ins_cost(175);
8625 format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %}
8626 ins_encode %{
8627 __ tzcntl($dst$$Register, $src$$Address);
8628 %}
8629 ins_pipe(ialu_reg_mem);
8630 %}
8631
8632 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
8633 predicate(!UseCountTrailingZerosInstruction);
8634 match(Set dst (CountTrailingZerosI src));
8635 effect(KILL cr);
8636
8637 format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t"
8638 "jnz done\n\t"
8639 "movl $dst, 32\n"
8640 "done:" %}
8641 ins_encode %{
8642 Register Rdst = $dst$$Register;
8643 Label done;
8644 __ bsfl(Rdst, $src$$Register);
8645 __ jccb(Assembler::notZero, done);
8646 __ movl(Rdst, BitsPerInt);
8647 __ bind(done);
8648 %}
8649 ins_pipe(ialu_reg);
8650 %}
8651
8652 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
8653 predicate(UseCountTrailingZerosInstruction);
8654 match(Set dst (CountTrailingZerosL src));
8655 effect(KILL cr);
8656
8657 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8658 ins_encode %{
8659 __ tzcntq($dst$$Register, $src$$Register);
8660 %}
8661 ins_pipe(ialu_reg);
8662 %}
8663
8664 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
8665 predicate(UseCountTrailingZerosInstruction);
8666 match(Set dst (CountTrailingZerosL (LoadL src)));
8667 effect(KILL cr);
8668 ins_cost(175);
8669 format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %}
8670 ins_encode %{
8671 __ tzcntq($dst$$Register, $src$$Address);
8672 %}
8673 ins_pipe(ialu_reg_mem);
8674 %}
8675
8676 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
8677 predicate(!UseCountTrailingZerosInstruction);
8678 match(Set dst (CountTrailingZerosL src));
8679 effect(KILL cr);
8680
8681 format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t"
8682 "jnz done\n\t"
8683 "movl $dst, 64\n"
8684 "done:" %}
8685 ins_encode %{
8686 Register Rdst = $dst$$Register;
8687 Label done;
8688 __ bsfq(Rdst, $src$$Register);
8689 __ jccb(Assembler::notZero, done);
8690 __ movl(Rdst, BitsPerLong);
8691 __ bind(done);
8692 %}
8693 ins_pipe(ialu_reg);
8694 %}
8695
8696 //--------------- Reverse Operation Instructions ----------------
8697 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
8698 predicate(!VM_Version::supports_gfni());
8699 match(Set dst (ReverseI src));
8700 effect(TEMP dst, TEMP rtmp, KILL cr);
8701 format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
8702 ins_encode %{
8703 __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
8704 %}
8705 ins_pipe( ialu_reg );
8706 %}
8707
8708 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
8709 predicate(VM_Version::supports_gfni());
8710 match(Set dst (ReverseI src));
8711 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8712 format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8713 ins_encode %{
8714 __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
8715 %}
8716 ins_pipe( ialu_reg );
8717 %}
8718
8719 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
8720 predicate(!VM_Version::supports_gfni());
8721 match(Set dst (ReverseL src));
8722 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
8723 format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
8724 ins_encode %{
8725 __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
8726 %}
8727 ins_pipe( ialu_reg );
8728 %}
8729
8730 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
8731 predicate(VM_Version::supports_gfni());
8732 match(Set dst (ReverseL src));
8733 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
8734 format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
8735 ins_encode %{
8736 __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
8737 %}
8738 ins_pipe( ialu_reg );
8739 %}
8740
8741 //---------- Population Count Instructions -------------------------------------
8742
8743 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
8744 predicate(UsePopCountInstruction);
8745 match(Set dst (PopCountI src));
8746 effect(KILL cr);
8747
8748 format %{ "popcnt $dst, $src" %}
8749 ins_encode %{
8750 __ popcntl($dst$$Register, $src$$Register);
8751 %}
8752 ins_pipe(ialu_reg);
8753 %}
8754
8755 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8756 predicate(UsePopCountInstruction);
8757 match(Set dst (PopCountI (LoadI mem)));
8758 effect(KILL cr);
8759
8760 format %{ "popcnt $dst, $mem" %}
8761 ins_encode %{
8762 __ popcntl($dst$$Register, $mem$$Address);
8763 %}
8764 ins_pipe(ialu_reg);
8765 %}
8766
8767 // Note: Long.bitCount(long) returns an int.
8768 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
8769 predicate(UsePopCountInstruction);
8770 match(Set dst (PopCountL src));
8771 effect(KILL cr);
8772
8773 format %{ "popcnt $dst, $src" %}
8774 ins_encode %{
8775 __ popcntq($dst$$Register, $src$$Register);
8776 %}
8777 ins_pipe(ialu_reg);
8778 %}
8779
8780 // Note: Long.bitCount(long) returns an int.
8781 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
8782 predicate(UsePopCountInstruction);
8783 match(Set dst (PopCountL (LoadL mem)));
8784 effect(KILL cr);
8785
8786 format %{ "popcnt $dst, $mem" %}
8787 ins_encode %{
8788 __ popcntq($dst$$Register, $mem$$Address);
8789 %}
8790 ins_pipe(ialu_reg);
8791 %}
8792
8793
8794 //----------MemBar Instructions-----------------------------------------------
8795 // Memory barrier flavors
8796
8797 instruct membar_acquire()
8798 %{
8799 match(MemBarAcquire);
8800 match(LoadFence);
8801 ins_cost(0);
8802
8803 size(0);
8804 format %{ "MEMBAR-acquire ! (empty encoding)" %}
8805 ins_encode();
8806 ins_pipe(empty);
8807 %}
8808
8809 instruct membar_acquire_lock()
8810 %{
8811 match(MemBarAcquireLock);
8812 ins_cost(0);
8813
8814 size(0);
8815 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
8816 ins_encode();
8817 ins_pipe(empty);
8818 %}
8819
8820 instruct membar_release()
8821 %{
8822 match(MemBarRelease);
8823 match(StoreFence);
8824 ins_cost(0);
8825
8826 size(0);
8827 format %{ "MEMBAR-release ! (empty encoding)" %}
8828 ins_encode();
8829 ins_pipe(empty);
8830 %}
8831
8832 instruct membar_release_lock()
8833 %{
8834 match(MemBarReleaseLock);
8835 ins_cost(0);
8836
8837 size(0);
8838 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
8839 ins_encode();
8840 ins_pipe(empty);
8841 %}
8842
8843 instruct membar_storeload(rFlagsReg cr) %{
8844 match(MemBarStoreLoad);
8845 effect(KILL cr);
8846 ins_cost(400);
8847
8848 format %{
8849 $$template
8850 $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
8851 %}
8852 ins_encode %{
8853 __ membar(Assembler::StoreLoad);
8854 %}
8855 ins_pipe(pipe_slow);
8856 %}
8857
8858 instruct membar_volatile(rFlagsReg cr) %{
8859 match(MemBarVolatile);
8860 effect(KILL cr);
8861 ins_cost(400);
8862
8863 format %{
8864 $$template
8865 $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
8866 %}
8867 ins_encode %{
8868 __ membar(Assembler::StoreLoad);
8869 %}
8870 ins_pipe(pipe_slow);
8871 %}
8872
8873 instruct unnecessary_membar_volatile()
8874 %{
8875 match(MemBarVolatile);
8876 predicate(Matcher::post_store_load_barrier(n));
8877 ins_cost(0);
8878
8879 size(0);
8880 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
8881 ins_encode();
8882 ins_pipe(empty);
8883 %}
8884
8885 instruct membar_full(rFlagsReg cr) %{
8886 match(MemBarFull);
8887 effect(KILL cr);
8888 ins_cost(400);
8889
8890 format %{
8891 $$template
8892 $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
8893 %}
8894 ins_encode %{
8895 __ membar(Assembler::StoreLoad);
8896 %}
8897 ins_pipe(pipe_slow);
8898 %}
8899
8900 instruct membar_storestore() %{
8901 match(MemBarStoreStore);
8902 match(StoreStoreFence);
8903 ins_cost(0);
8904
8905 size(0);
8906 format %{ "MEMBAR-storestore (empty encoding)" %}
8907 ins_encode( );
8908 ins_pipe(empty);
8909 %}
8910
8911 //----------Move Instructions--------------------------------------------------
8912
8913 instruct castX2P(rRegP dst, rRegL src)
8914 %{
8915 match(Set dst (CastX2P src));
8916
8917 format %{ "movq $dst, $src\t# long->ptr" %}
8918 ins_encode %{
8919 if ($dst$$reg != $src$$reg) {
8920 __ movptr($dst$$Register, $src$$Register);
8921 }
8922 %}
8923 ins_pipe(ialu_reg_reg); // XXX
8924 %}
8925
8926 instruct castP2X(rRegL dst, rRegP src)
8927 %{
8928 match(Set dst (CastP2X src));
8929
8930 format %{ "movq $dst, $src\t# ptr -> long" %}
8931 ins_encode %{
8932 if ($dst$$reg != $src$$reg) {
8933 __ movptr($dst$$Register, $src$$Register);
8934 }
8935 %}
8936 ins_pipe(ialu_reg_reg); // XXX
8937 %}
8938
8939 // Convert oop into int for vectors alignment masking
8940 instruct convP2I(rRegI dst, rRegP src)
8941 %{
8942 match(Set dst (ConvL2I (CastP2X src)));
8943
8944 format %{ "movl $dst, $src\t# ptr -> int" %}
8945 ins_encode %{
8946 __ movl($dst$$Register, $src$$Register);
8947 %}
8948 ins_pipe(ialu_reg_reg); // XXX
8949 %}
8950
8951 // Convert compressed oop into int for vectors alignment masking
8952 // in case of 32bit oops (heap < 4Gb).
8953 instruct convN2I(rRegI dst, rRegN src)
8954 %{
8955 predicate(CompressedOops::shift() == 0);
8956 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
8957
8958 format %{ "movl $dst, $src\t# compressed ptr -> int" %}
8959 ins_encode %{
8960 __ movl($dst$$Register, $src$$Register);
8961 %}
8962 ins_pipe(ialu_reg_reg); // XXX
8963 %}
8964
8965 // Convert oop pointer into compressed form
8966 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
8967 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
8968 match(Set dst (EncodeP src));
8969 effect(KILL cr);
8970 format %{ "encode_heap_oop $dst,$src" %}
8971 ins_encode %{
8972 Register s = $src$$Register;
8973 Register d = $dst$$Register;
8974 if (s != d) {
8975 __ movq(d, s);
8976 }
8977 __ encode_heap_oop(d);
8978 %}
8979 ins_pipe(ialu_reg_long);
8980 %}
8981
8982 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
8983 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
8984 match(Set dst (EncodeP src));
8985 effect(KILL cr);
8986 format %{ "encode_heap_oop_not_null $dst,$src" %}
8987 ins_encode %{
8988 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
8989 %}
8990 ins_pipe(ialu_reg_long);
8991 %}
8992
8993 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
8994 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8995 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8996 match(Set dst (DecodeN src));
8997 effect(KILL cr);
8998 format %{ "decode_heap_oop $dst,$src" %}
8999 ins_encode %{
9000 Register s = $src$$Register;
9001 Register d = $dst$$Register;
9002 if (s != d) {
9003 __ movq(d, s);
9004 }
9005 __ decode_heap_oop(d);
9006 %}
9007 ins_pipe(ialu_reg_long);
9008 %}
9009
9010 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9011 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9012 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9013 match(Set dst (DecodeN src));
9014 effect(KILL cr);
9015 format %{ "decode_heap_oop_not_null $dst,$src" %}
9016 ins_encode %{
9017 Register s = $src$$Register;
9018 Register d = $dst$$Register;
9019 if (s != d) {
9020 __ decode_heap_oop_not_null(d, s);
9021 } else {
9022 __ decode_heap_oop_not_null(d);
9023 }
9024 %}
9025 ins_pipe(ialu_reg_long);
9026 %}
9027
9028 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
9029 match(Set dst (EncodePKlass src));
9030 effect(TEMP dst, KILL cr);
9031 format %{ "encode_and_move_klass_not_null $dst,$src" %}
9032 ins_encode %{
9033 __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
9034 %}
9035 ins_pipe(ialu_reg_long);
9036 %}
9037
9038 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
9039 match(Set dst (DecodeNKlass src));
9040 effect(TEMP dst, KILL cr);
9041 format %{ "decode_and_move_klass_not_null $dst,$src" %}
9042 ins_encode %{
9043 __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
9044 %}
9045 ins_pipe(ialu_reg_long);
9046 %}
9047
9048 //----------Conditional Move---------------------------------------------------
9049 // Jump
9050 // dummy instruction for generating temp registers
9051 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
9052 match(Jump (LShiftL switch_val shift));
9053 ins_cost(350);
9054 predicate(false);
9055 effect(TEMP dest);
9056
9057 format %{ "leaq $dest, [$constantaddress]\n\t"
9058 "jmp [$dest + $switch_val << $shift]\n\t" %}
9059 ins_encode %{
9060 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9061 // to do that and the compiler is using that register as one it can allocate.
9062 // So we build it all by hand.
9063 // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
9064 // ArrayAddress dispatch(table, index);
9065 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
9066 __ lea($dest$$Register, $constantaddress);
9067 __ jmp(dispatch);
9068 %}
9069 ins_pipe(pipe_jmp);
9070 %}
9071
9072 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
9073 match(Jump (AddL (LShiftL switch_val shift) offset));
9074 ins_cost(350);
9075 effect(TEMP dest);
9076
9077 format %{ "leaq $dest, [$constantaddress]\n\t"
9078 "jmp [$dest + $switch_val << $shift + $offset]\n\t" %}
9079 ins_encode %{
9080 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9081 // to do that and the compiler is using that register as one it can allocate.
9082 // So we build it all by hand.
9083 // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9084 // ArrayAddress dispatch(table, index);
9085 Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
9086 __ lea($dest$$Register, $constantaddress);
9087 __ jmp(dispatch);
9088 %}
9089 ins_pipe(pipe_jmp);
9090 %}
9091
9092 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
9093 match(Jump switch_val);
9094 ins_cost(350);
9095 effect(TEMP dest);
9096
9097 format %{ "leaq $dest, [$constantaddress]\n\t"
9098 "jmp [$dest + $switch_val]\n\t" %}
9099 ins_encode %{
9100 // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
9101 // to do that and the compiler is using that register as one it can allocate.
9102 // So we build it all by hand.
9103 // Address index(noreg, switch_reg, Address::times_1);
9104 // ArrayAddress dispatch(table, index);
9105 Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
9106 __ lea($dest$$Register, $constantaddress);
9107 __ jmp(dispatch);
9108 %}
9109 ins_pipe(pipe_jmp);
9110 %}
9111
9112 // Conditional move
9113 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
9114 %{
9115 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9116 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9117
9118 ins_cost(100); // XXX
9119 format %{ "setbn$cop $dst\t# signed, int" %}
9120 ins_encode %{
9121 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9122 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9123 %}
9124 ins_pipe(ialu_reg);
9125 %}
9126
9127 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
9128 %{
9129 predicate(!UseAPX);
9130 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9131
9132 ins_cost(200); // XXX
9133 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9134 ins_encode %{
9135 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9136 %}
9137 ins_pipe(pipe_cmov_reg);
9138 %}
9139
9140 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
9141 %{
9142 predicate(UseAPX);
9143 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9144
9145 ins_cost(200);
9146 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9147 ins_encode %{
9148 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9149 %}
9150 ins_pipe(pipe_cmov_reg);
9151 %}
9152
9153 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
9154 %{
9155 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9156 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9157
9158 ins_cost(100); // XXX
9159 format %{ "setbn$cop $dst\t# unsigned, int" %}
9160 ins_encode %{
9161 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9162 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9163 %}
9164 ins_pipe(ialu_reg);
9165 %}
9166
9167 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
9168 predicate(!UseAPX);
9169 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9170
9171 ins_cost(200); // XXX
9172 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9173 ins_encode %{
9174 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9175 %}
9176 ins_pipe(pipe_cmov_reg);
9177 %}
9178
9179 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
9180 predicate(UseAPX);
9181 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9182
9183 ins_cost(200);
9184 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9185 ins_encode %{
9186 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9187 %}
9188 ins_pipe(pipe_cmov_reg);
9189 %}
9190
9191 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9192 %{
9193 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9194 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9195
9196 ins_cost(100); // XXX
9197 format %{ "setbn$cop $dst\t# unsigned, int" %}
9198 ins_encode %{
9199 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9200 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9201 %}
9202 ins_pipe(ialu_reg);
9203 %}
9204
9205 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9206 %{
9207 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
9208 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9209
9210 ins_cost(100); // XXX
9211 format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
9212 ins_encode %{
9213 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9214 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9215 %}
9216 ins_pipe(ialu_reg);
9217 %}
9218
9219 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9220 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9221
9222 ins_cost(200);
9223 expand %{
9224 cmovI_regU(cop, cr, dst, src);
9225 %}
9226 %}
9227
9228 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
9229 match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
9230
9231 ins_cost(200);
9232 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9233 ins_encode %{
9234 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9235 %}
9236 ins_pipe(pipe_cmov_reg);
9237 %}
9238
9239 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9240 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9241 match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
9242
9243 ins_cost(200); // XXX
9244 format %{ "cmovpl $dst, $src\n\t"
9245 "cmovnel $dst, $src" %}
9246 ins_encode %{
9247 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9248 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9249 %}
9250 ins_pipe(pipe_cmov_reg);
9251 %}
9252
9253 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9254 // inputs of the CMove
9255 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
9256 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9257 match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
9258 effect(TEMP dst);
9259
9260 ins_cost(200); // XXX
9261 format %{ "cmovpl $dst, $src\n\t"
9262 "cmovnel $dst, $src" %}
9263 ins_encode %{
9264 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9265 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9266 %}
9267 ins_pipe(pipe_cmov_reg);
9268 %}
9269
9270 // Conditional move
9271 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
9272 predicate(!UseAPX);
9273 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9274
9275 ins_cost(250); // XXX
9276 format %{ "cmovl$cop $dst, $src\t# signed, int" %}
9277 ins_encode %{
9278 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9279 %}
9280 ins_pipe(pipe_cmov_mem);
9281 %}
9282
9283 // Conditional move
9284 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
9285 %{
9286 predicate(UseAPX);
9287 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9288
9289 ins_cost(250);
9290 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
9291 ins_encode %{
9292 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9293 %}
9294 ins_pipe(pipe_cmov_mem);
9295 %}
9296
9297 // Conditional move
9298 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
9299 %{
9300 predicate(!UseAPX);
9301 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9302
9303 ins_cost(250); // XXX
9304 format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
9305 ins_encode %{
9306 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9307 %}
9308 ins_pipe(pipe_cmov_mem);
9309 %}
9310
9311 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
9312 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
9313
9314 ins_cost(250);
9315 expand %{
9316 cmovI_memU(cop, cr, dst, src);
9317 %}
9318 %}
9319
9320 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
9321 %{
9322 predicate(UseAPX);
9323 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9324
9325 ins_cost(250);
9326 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
9327 ins_encode %{
9328 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9329 %}
9330 ins_pipe(pipe_cmov_mem);
9331 %}
9332
9333 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
9334 %{
9335 match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
9336
9337 ins_cost(250);
9338 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
9339 ins_encode %{
9340 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9341 %}
9342 ins_pipe(pipe_cmov_mem);
9343 %}
9344
9345 // Conditional move
9346 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
9347 %{
9348 predicate(!UseAPX);
9349 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9350
9351 ins_cost(200); // XXX
9352 format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
9353 ins_encode %{
9354 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9355 %}
9356 ins_pipe(pipe_cmov_reg);
9357 %}
9358
9359 // Conditional move ndd
9360 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
9361 %{
9362 predicate(UseAPX);
9363 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9364
9365 ins_cost(200);
9366 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
9367 ins_encode %{
9368 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9369 %}
9370 ins_pipe(pipe_cmov_reg);
9371 %}
9372
9373 // Conditional move
9374 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
9375 %{
9376 predicate(!UseAPX);
9377 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9378
9379 ins_cost(200); // XXX
9380 format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
9381 ins_encode %{
9382 __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9383 %}
9384 ins_pipe(pipe_cmov_reg);
9385 %}
9386
9387 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9388 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9389
9390 ins_cost(200);
9391 expand %{
9392 cmovN_regU(cop, cr, dst, src);
9393 %}
9394 %}
9395
9396 // Conditional move ndd
9397 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
9398 %{
9399 predicate(UseAPX);
9400 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9401
9402 ins_cost(200);
9403 format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
9404 ins_encode %{
9405 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9406 %}
9407 ins_pipe(pipe_cmov_reg);
9408 %}
9409
9410 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
9411 match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
9412
9413 ins_cost(200);
9414 format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
9415 ins_encode %{
9416 __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9417 %}
9418 ins_pipe(pipe_cmov_reg);
9419 %}
9420
9421 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9422 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9423 match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
9424
9425 ins_cost(200); // XXX
9426 format %{ "cmovpl $dst, $src\n\t"
9427 "cmovnel $dst, $src" %}
9428 ins_encode %{
9429 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9430 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9431 %}
9432 ins_pipe(pipe_cmov_reg);
9433 %}
9434
9435 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9436 // inputs of the CMove
9437 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
9438 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9439 match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
9440
9441 ins_cost(200); // XXX
9442 format %{ "cmovpl $dst, $src\n\t"
9443 "cmovnel $dst, $src" %}
9444 ins_encode %{
9445 __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
9446 __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
9447 %}
9448 ins_pipe(pipe_cmov_reg);
9449 %}
9450
9451 // Conditional move
9452 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
9453 %{
9454 predicate(!UseAPX);
9455 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9456
9457 ins_cost(200); // XXX
9458 format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
9459 ins_encode %{
9460 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9461 %}
9462 ins_pipe(pipe_cmov_reg); // XXX
9463 %}
9464
9465 // Conditional move ndd
9466 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
9467 %{
9468 predicate(UseAPX);
9469 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9470
9471 ins_cost(200);
9472 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
9473 ins_encode %{
9474 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9475 %}
9476 ins_pipe(pipe_cmov_reg);
9477 %}
9478
9479 // Conditional move
9480 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
9481 %{
9482 predicate(!UseAPX);
9483 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9484
9485 ins_cost(200); // XXX
9486 format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
9487 ins_encode %{
9488 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9489 %}
9490 ins_pipe(pipe_cmov_reg); // XXX
9491 %}
9492
9493 // Conditional move ndd
9494 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
9495 %{
9496 predicate(UseAPX);
9497 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9498
9499 ins_cost(200);
9500 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
9501 ins_encode %{
9502 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9503 %}
9504 ins_pipe(pipe_cmov_reg);
9505 %}
9506
9507 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9508 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9509
9510 ins_cost(200);
9511 expand %{
9512 cmovP_regU(cop, cr, dst, src);
9513 %}
9514 %}
9515
9516 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
9517 match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
9518
9519 ins_cost(200);
9520 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
9521 ins_encode %{
9522 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9523 %}
9524 ins_pipe(pipe_cmov_reg);
9525 %}
9526
9527 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9528 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9529 match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
9530
9531 ins_cost(200); // XXX
9532 format %{ "cmovpq $dst, $src\n\t"
9533 "cmovneq $dst, $src" %}
9534 ins_encode %{
9535 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9536 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9537 %}
9538 ins_pipe(pipe_cmov_reg);
9539 %}
9540
9541 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9542 // inputs of the CMove
9543 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
9544 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9545 match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
9546
9547 ins_cost(200); // XXX
9548 format %{ "cmovpq $dst, $src\n\t"
9549 "cmovneq $dst, $src" %}
9550 ins_encode %{
9551 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9552 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9553 %}
9554 ins_pipe(pipe_cmov_reg);
9555 %}
9556
9557 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
9558 %{
9559 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9560 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9561
9562 ins_cost(100); // XXX
9563 format %{ "setbn$cop $dst\t# signed, long" %}
9564 ins_encode %{
9565 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9566 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9567 %}
9568 ins_pipe(ialu_reg);
9569 %}
9570
9571 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
9572 %{
9573 predicate(!UseAPX);
9574 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9575
9576 ins_cost(200); // XXX
9577 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9578 ins_encode %{
9579 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9580 %}
9581 ins_pipe(pipe_cmov_reg); // XXX
9582 %}
9583
9584 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
9585 %{
9586 predicate(UseAPX);
9587 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9588
9589 ins_cost(200);
9590 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9591 ins_encode %{
9592 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9593 %}
9594 ins_pipe(pipe_cmov_reg);
9595 %}
9596
9597 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
9598 %{
9599 predicate(!UseAPX);
9600 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9601
9602 ins_cost(200); // XXX
9603 format %{ "cmovq$cop $dst, $src\t# signed, long" %}
9604 ins_encode %{
9605 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9606 %}
9607 ins_pipe(pipe_cmov_mem); // XXX
9608 %}
9609
9610 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
9611 %{
9612 predicate(UseAPX);
9613 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9614
9615 ins_cost(200);
9616 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
9617 ins_encode %{
9618 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9619 %}
9620 ins_pipe(pipe_cmov_mem);
9621 %}
9622
9623 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
9624 %{
9625 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9626 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9627
9628 ins_cost(100); // XXX
9629 format %{ "setbn$cop $dst\t# unsigned, long" %}
9630 ins_encode %{
9631 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9632 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9633 %}
9634 ins_pipe(ialu_reg);
9635 %}
9636
9637 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
9638 %{
9639 predicate(!UseAPX);
9640 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9641
9642 ins_cost(200); // XXX
9643 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9644 ins_encode %{
9645 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
9646 %}
9647 ins_pipe(pipe_cmov_reg); // XXX
9648 %}
9649
9650 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
9651 %{
9652 predicate(UseAPX);
9653 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9654
9655 ins_cost(200);
9656 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9657 ins_encode %{
9658 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9659 %}
9660 ins_pipe(pipe_cmov_reg);
9661 %}
9662
9663 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
9664 %{
9665 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9666 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9667
9668 ins_cost(100); // XXX
9669 format %{ "setbn$cop $dst\t# unsigned, long" %}
9670 ins_encode %{
9671 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9672 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9673 %}
9674 ins_pipe(ialu_reg);
9675 %}
9676
9677 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
9678 %{
9679 predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
9680 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9681
9682 ins_cost(100); // XXX
9683 format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
9684 ins_encode %{
9685 Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
9686 __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
9687 %}
9688 ins_pipe(ialu_reg);
9689 %}
9690
9691 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9692 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9693
9694 ins_cost(200);
9695 expand %{
9696 cmovL_regU(cop, cr, dst, src);
9697 %}
9698 %}
9699
9700 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
9701 %{
9702 match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
9703
9704 ins_cost(200);
9705 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9706 ins_encode %{
9707 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
9708 %}
9709 ins_pipe(pipe_cmov_reg);
9710 %}
9711
9712 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9713 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
9714 match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
9715
9716 ins_cost(200); // XXX
9717 format %{ "cmovpq $dst, $src\n\t"
9718 "cmovneq $dst, $src" %}
9719 ins_encode %{
9720 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9721 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9722 %}
9723 ins_pipe(pipe_cmov_reg);
9724 %}
9725
9726 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
9727 // inputs of the CMove
9728 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
9729 predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
9730 match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
9731
9732 ins_cost(200); // XXX
9733 format %{ "cmovpq $dst, $src\n\t"
9734 "cmovneq $dst, $src" %}
9735 ins_encode %{
9736 __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
9737 __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
9738 %}
9739 ins_pipe(pipe_cmov_reg);
9740 %}
9741
9742 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
9743 %{
9744 predicate(!UseAPX);
9745 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9746
9747 ins_cost(200); // XXX
9748 format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
9749 ins_encode %{
9750 __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
9751 %}
9752 ins_pipe(pipe_cmov_mem); // XXX
9753 %}
9754
9755 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
9756 match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
9757
9758 ins_cost(200);
9759 expand %{
9760 cmovL_memU(cop, cr, dst, src);
9761 %}
9762 %}
9763
9764 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
9765 %{
9766 predicate(UseAPX);
9767 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9768
9769 ins_cost(200);
9770 format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
9771 ins_encode %{
9772 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9773 %}
9774 ins_pipe(pipe_cmov_mem);
9775 %}
9776
9777 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
9778 %{
9779 match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
9780
9781 ins_cost(200);
9782 format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
9783 ins_encode %{
9784 __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
9785 %}
9786 ins_pipe(pipe_cmov_mem);
9787 %}
9788
9789 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
9790 %{
9791 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9792
9793 ins_cost(200); // XXX
9794 format %{ "jn$cop skip\t# signed cmove float\n\t"
9795 "movss $dst, $src\n"
9796 "skip:" %}
9797 ins_encode %{
9798 Label Lskip;
9799 // Invert sense of branch from sense of CMOV
9800 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9801 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9802 __ bind(Lskip);
9803 %}
9804 ins_pipe(pipe_slow);
9805 %}
9806
9807 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
9808 %{
9809 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9810
9811 ins_cost(200); // XXX
9812 format %{ "jn$cop skip\t# unsigned cmove float\n\t"
9813 "movss $dst, $src\n"
9814 "skip:" %}
9815 ins_encode %{
9816 Label Lskip;
9817 // Invert sense of branch from sense of CMOV
9818 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9819 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9820 __ bind(Lskip);
9821 %}
9822 ins_pipe(pipe_slow);
9823 %}
9824
9825 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
9826 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9827
9828 ins_cost(200);
9829 expand %{
9830 cmovF_regU(cop, cr, dst, src);
9831 %}
9832 %}
9833
9834 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
9835 %{
9836 match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
9837
9838 ins_cost(200); // XXX
9839 format %{ "jn$cop skip\t# signed, unsigned cmove float\n\t"
9840 "movss $dst, $src\n"
9841 "skip:" %}
9842 ins_encode %{
9843 Label Lskip;
9844 // Invert sense of branch from sense of CMOV
9845 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9846 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
9847 __ bind(Lskip);
9848 %}
9849 ins_pipe(pipe_slow);
9850 %}
9851
9852 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
9853 %{
9854 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9855
9856 ins_cost(200); // XXX
9857 format %{ "jn$cop skip\t# signed cmove double\n\t"
9858 "movsd $dst, $src\n"
9859 "skip:" %}
9860 ins_encode %{
9861 Label Lskip;
9862 // Invert sense of branch from sense of CMOV
9863 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9864 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9865 __ bind(Lskip);
9866 %}
9867 ins_pipe(pipe_slow);
9868 %}
9869
9870 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
9871 %{
9872 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9873
9874 ins_cost(200); // XXX
9875 format %{ "jn$cop skip\t# unsigned cmove double\n\t"
9876 "movsd $dst, $src\n"
9877 "skip:" %}
9878 ins_encode %{
9879 Label Lskip;
9880 // Invert sense of branch from sense of CMOV
9881 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9882 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9883 __ bind(Lskip);
9884 %}
9885 ins_pipe(pipe_slow);
9886 %}
9887
9888 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
9889 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9890
9891 ins_cost(200);
9892 expand %{
9893 cmovD_regU(cop, cr, dst, src);
9894 %}
9895 %}
9896
9897 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
9898 %{
9899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
9900
9901 ins_cost(200); // XXX
9902 format %{ "jn$cop skip\t# signed, unsigned cmove double\n\t"
9903 "movsd $dst, $src\n"
9904 "skip:" %}
9905 ins_encode %{
9906 Label Lskip;
9907 // Invert sense of branch from sense of CMOV
9908 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
9909 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
9910 __ bind(Lskip);
9911 %}
9912 ins_pipe(pipe_slow);
9913 %}
9914
9915 //----------Arithmetic Instructions--------------------------------------------
9916 //----------Addition Instructions----------------------------------------------
9917
9918 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
9919 %{
9920 predicate(!UseAPX);
9921 match(Set dst (AddI dst src));
9922 effect(KILL cr);
9923 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9924 format %{ "addl $dst, $src\t# int" %}
9925 ins_encode %{
9926 __ addl($dst$$Register, $src$$Register);
9927 %}
9928 ins_pipe(ialu_reg_reg);
9929 %}
9930
9931 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
9932 %{
9933 predicate(UseAPX);
9934 match(Set dst (AddI src1 src2));
9935 effect(KILL cr);
9936 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
9937
9938 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9939 ins_encode %{
9940 __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
9941 %}
9942 ins_pipe(ialu_reg_reg);
9943 %}
9944
9945 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
9946 %{
9947 predicate(!UseAPX);
9948 match(Set dst (AddI dst src));
9949 effect(KILL cr);
9950 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9951
9952 format %{ "addl $dst, $src\t# int" %}
9953 ins_encode %{
9954 __ addl($dst$$Register, $src$$constant);
9955 %}
9956 ins_pipe( ialu_reg );
9957 %}
9958
9959 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
9960 %{
9961 predicate(UseAPX);
9962 match(Set dst (AddI src1 src2));
9963 effect(KILL cr);
9964 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
9965
9966 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9967 ins_encode %{
9968 __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
9969 %}
9970 ins_pipe( ialu_reg );
9971 %}
9972
9973 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
9974 %{
9975 predicate(UseAPX);
9976 match(Set dst (AddI (LoadI src1) src2));
9977 effect(KILL cr);
9978 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9979
9980 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
9981 ins_encode %{
9982 __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
9983 %}
9984 ins_pipe( ialu_reg );
9985 %}
9986
9987 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
9988 %{
9989 predicate(!UseAPX);
9990 match(Set dst (AddI dst (LoadI src)));
9991 effect(KILL cr);
9992 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
9993
9994 ins_cost(150); // XXX
9995 format %{ "addl $dst, $src\t# int" %}
9996 ins_encode %{
9997 __ addl($dst$$Register, $src$$Address);
9998 %}
9999 ins_pipe(ialu_reg_mem);
10000 %}
10001
10002 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10003 %{
10004 predicate(UseAPX);
10005 match(Set dst (AddI src1 (LoadI src2)));
10006 effect(KILL cr);
10007 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10008
10009 ins_cost(150);
10010 format %{ "eaddl $dst, $src1, $src2\t# int ndd" %}
10011 ins_encode %{
10012 __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10013 %}
10014 ins_pipe(ialu_reg_mem);
10015 %}
10016
10017 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10018 %{
10019 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10020 effect(KILL cr);
10021 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10022
10023 ins_cost(150); // XXX
10024 format %{ "addl $dst, $src\t# int" %}
10025 ins_encode %{
10026 __ addl($dst$$Address, $src$$Register);
10027 %}
10028 ins_pipe(ialu_mem_reg);
10029 %}
10030
10031 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10032 %{
10033 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034 effect(KILL cr);
10035 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10036
10037
10038 ins_cost(125); // XXX
10039 format %{ "addl $dst, $src\t# int" %}
10040 ins_encode %{
10041 __ addl($dst$$Address, $src$$constant);
10042 %}
10043 ins_pipe(ialu_mem_imm);
10044 %}
10045
10046 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10047 %{
10048 predicate(!UseAPX && UseIncDec);
10049 match(Set dst (AddI dst src));
10050 effect(KILL cr);
10051
10052 format %{ "incl $dst\t# int" %}
10053 ins_encode %{
10054 __ incrementl($dst$$Register);
10055 %}
10056 ins_pipe(ialu_reg);
10057 %}
10058
10059 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10060 %{
10061 predicate(UseAPX && UseIncDec);
10062 match(Set dst (AddI src val));
10063 effect(KILL cr);
10064 flag(PD::Flag_ndd_demotable_opr1);
10065
10066 format %{ "eincl $dst, $src\t# int ndd" %}
10067 ins_encode %{
10068 __ eincl($dst$$Register, $src$$Register, false);
10069 %}
10070 ins_pipe(ialu_reg);
10071 %}
10072
10073 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10074 %{
10075 predicate(UseAPX && UseIncDec);
10076 match(Set dst (AddI (LoadI src) val));
10077 effect(KILL cr);
10078
10079 format %{ "eincl $dst, $src\t# int ndd" %}
10080 ins_encode %{
10081 __ eincl($dst$$Register, $src$$Address, false);
10082 %}
10083 ins_pipe(ialu_reg);
10084 %}
10085
10086 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10087 %{
10088 predicate(UseIncDec);
10089 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10090 effect(KILL cr);
10091
10092 ins_cost(125); // XXX
10093 format %{ "incl $dst\t# int" %}
10094 ins_encode %{
10095 __ incrementl($dst$$Address);
10096 %}
10097 ins_pipe(ialu_mem_imm);
10098 %}
10099
10100 // XXX why does that use AddI
10101 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10102 %{
10103 predicate(!UseAPX && UseIncDec);
10104 match(Set dst (AddI dst src));
10105 effect(KILL cr);
10106
10107 format %{ "decl $dst\t# int" %}
10108 ins_encode %{
10109 __ decrementl($dst$$Register);
10110 %}
10111 ins_pipe(ialu_reg);
10112 %}
10113
10114 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10115 %{
10116 predicate(UseAPX && UseIncDec);
10117 match(Set dst (AddI src val));
10118 effect(KILL cr);
10119 flag(PD::Flag_ndd_demotable_opr1);
10120
10121 format %{ "edecl $dst, $src\t# int ndd" %}
10122 ins_encode %{
10123 __ edecl($dst$$Register, $src$$Register, false);
10124 %}
10125 ins_pipe(ialu_reg);
10126 %}
10127
10128 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10129 %{
10130 predicate(UseAPX && UseIncDec);
10131 match(Set dst (AddI (LoadI src) val));
10132 effect(KILL cr);
10133
10134 format %{ "edecl $dst, $src\t# int ndd" %}
10135 ins_encode %{
10136 __ edecl($dst$$Register, $src$$Address, false);
10137 %}
10138 ins_pipe(ialu_reg);
10139 %}
10140
10141 // XXX why does that use AddI
10142 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10143 %{
10144 predicate(UseIncDec);
10145 match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10146 effect(KILL cr);
10147
10148 ins_cost(125); // XXX
10149 format %{ "decl $dst\t# int" %}
10150 ins_encode %{
10151 __ decrementl($dst$$Address);
10152 %}
10153 ins_pipe(ialu_mem_imm);
10154 %}
10155
10156 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10157 %{
10158 predicate(VM_Version::supports_fast_2op_lea());
10159 match(Set dst (AddI (LShiftI index scale) disp));
10160
10161 format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10162 ins_encode %{
10163 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10164 __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10165 %}
10166 ins_pipe(ialu_reg_reg);
10167 %}
10168
10169 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10170 %{
10171 predicate(VM_Version::supports_fast_3op_lea());
10172 match(Set dst (AddI (AddI base index) disp));
10173
10174 format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10175 ins_encode %{
10176 __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10177 %}
10178 ins_pipe(ialu_reg_reg);
10179 %}
10180
10181 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10182 %{
10183 predicate(VM_Version::supports_fast_2op_lea());
10184 match(Set dst (AddI base (LShiftI index scale)));
10185
10186 format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10187 ins_encode %{
10188 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10189 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10190 %}
10191 ins_pipe(ialu_reg_reg);
10192 %}
10193
10194 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10195 %{
10196 predicate(VM_Version::supports_fast_3op_lea());
10197 match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10198
10199 format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10200 ins_encode %{
10201 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10202 __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10203 %}
10204 ins_pipe(ialu_reg_reg);
10205 %}
10206
10207 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10208 %{
10209 predicate(!UseAPX);
10210 match(Set dst (AddL dst src));
10211 effect(KILL cr);
10212 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10213
10214 format %{ "addq $dst, $src\t# long" %}
10215 ins_encode %{
10216 __ addq($dst$$Register, $src$$Register);
10217 %}
10218 ins_pipe(ialu_reg_reg);
10219 %}
10220
10221 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10222 %{
10223 predicate(UseAPX);
10224 match(Set dst (AddL src1 src2));
10225 effect(KILL cr);
10226 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10227
10228 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10229 ins_encode %{
10230 __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10231 %}
10232 ins_pipe(ialu_reg_reg);
10233 %}
10234
10235 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10236 %{
10237 predicate(!UseAPX);
10238 match(Set dst (AddL dst src));
10239 effect(KILL cr);
10240 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10241
10242 format %{ "addq $dst, $src\t# long" %}
10243 ins_encode %{
10244 __ addq($dst$$Register, $src$$constant);
10245 %}
10246 ins_pipe( ialu_reg );
10247 %}
10248
10249 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10250 %{
10251 predicate(UseAPX);
10252 match(Set dst (AddL src1 src2));
10253 effect(KILL cr);
10254 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10255
10256 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10257 ins_encode %{
10258 __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10259 %}
10260 ins_pipe( ialu_reg );
10261 %}
10262
10263 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10264 %{
10265 predicate(UseAPX);
10266 match(Set dst (AddL (LoadL src1) src2));
10267 effect(KILL cr);
10268 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10269
10270 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10271 ins_encode %{
10272 __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10273 %}
10274 ins_pipe( ialu_reg );
10275 %}
10276
10277 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10278 %{
10279 predicate(!UseAPX);
10280 match(Set dst (AddL dst (LoadL src)));
10281 effect(KILL cr);
10282 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10283
10284 ins_cost(150); // XXX
10285 format %{ "addq $dst, $src\t# long" %}
10286 ins_encode %{
10287 __ addq($dst$$Register, $src$$Address);
10288 %}
10289 ins_pipe(ialu_reg_mem);
10290 %}
10291
10292 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10293 %{
10294 predicate(UseAPX);
10295 match(Set dst (AddL src1 (LoadL src2)));
10296 effect(KILL cr);
10297 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10298
10299 ins_cost(150);
10300 format %{ "eaddq $dst, $src1, $src2\t# long ndd" %}
10301 ins_encode %{
10302 __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10303 %}
10304 ins_pipe(ialu_reg_mem);
10305 %}
10306
10307 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10308 %{
10309 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10310 effect(KILL cr);
10311 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10312
10313 ins_cost(150); // XXX
10314 format %{ "addq $dst, $src\t# long" %}
10315 ins_encode %{
10316 __ addq($dst$$Address, $src$$Register);
10317 %}
10318 ins_pipe(ialu_mem_reg);
10319 %}
10320
10321 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10322 %{
10323 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10324 effect(KILL cr);
10325 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10326
10327 ins_cost(125); // XXX
10328 format %{ "addq $dst, $src\t# long" %}
10329 ins_encode %{
10330 __ addq($dst$$Address, $src$$constant);
10331 %}
10332 ins_pipe(ialu_mem_imm);
10333 %}
10334
10335 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10336 %{
10337 predicate(!UseAPX && UseIncDec);
10338 match(Set dst (AddL dst src));
10339 effect(KILL cr);
10340
10341 format %{ "incq $dst\t# long" %}
10342 ins_encode %{
10343 __ incrementq($dst$$Register);
10344 %}
10345 ins_pipe(ialu_reg);
10346 %}
10347
10348 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10349 %{
10350 predicate(UseAPX && UseIncDec);
10351 match(Set dst (AddL src val));
10352 effect(KILL cr);
10353 flag(PD::Flag_ndd_demotable_opr1);
10354
10355 format %{ "eincq $dst, $src\t# long ndd" %}
10356 ins_encode %{
10357 __ eincq($dst$$Register, $src$$Register, false);
10358 %}
10359 ins_pipe(ialu_reg);
10360 %}
10361
10362 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10363 %{
10364 predicate(UseAPX && UseIncDec);
10365 match(Set dst (AddL (LoadL src) val));
10366 effect(KILL cr);
10367
10368 format %{ "eincq $dst, $src\t# long ndd" %}
10369 ins_encode %{
10370 __ eincq($dst$$Register, $src$$Address, false);
10371 %}
10372 ins_pipe(ialu_reg);
10373 %}
10374
10375 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10376 %{
10377 predicate(UseIncDec);
10378 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10379 effect(KILL cr);
10380
10381 ins_cost(125); // XXX
10382 format %{ "incq $dst\t# long" %}
10383 ins_encode %{
10384 __ incrementq($dst$$Address);
10385 %}
10386 ins_pipe(ialu_mem_imm);
10387 %}
10388
10389 // XXX why does that use AddL
10390 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10391 %{
10392 predicate(!UseAPX && UseIncDec);
10393 match(Set dst (AddL dst src));
10394 effect(KILL cr);
10395
10396 format %{ "decq $dst\t# long" %}
10397 ins_encode %{
10398 __ decrementq($dst$$Register);
10399 %}
10400 ins_pipe(ialu_reg);
10401 %}
10402
10403 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10404 %{
10405 predicate(UseAPX && UseIncDec);
10406 match(Set dst (AddL src val));
10407 effect(KILL cr);
10408 flag(PD::Flag_ndd_demotable_opr1);
10409
10410 format %{ "edecq $dst, $src\t# long ndd" %}
10411 ins_encode %{
10412 __ edecq($dst$$Register, $src$$Register, false);
10413 %}
10414 ins_pipe(ialu_reg);
10415 %}
10416
10417 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10418 %{
10419 predicate(UseAPX && UseIncDec);
10420 match(Set dst (AddL (LoadL src) val));
10421 effect(KILL cr);
10422
10423 format %{ "edecq $dst, $src\t# long ndd" %}
10424 ins_encode %{
10425 __ edecq($dst$$Register, $src$$Address, false);
10426 %}
10427 ins_pipe(ialu_reg);
10428 %}
10429
10430 // XXX why does that use AddL
10431 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10432 %{
10433 predicate(UseIncDec);
10434 match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10435 effect(KILL cr);
10436
10437 ins_cost(125); // XXX
10438 format %{ "decq $dst\t# long" %}
10439 ins_encode %{
10440 __ decrementq($dst$$Address);
10441 %}
10442 ins_pipe(ialu_mem_imm);
10443 %}
10444
10445 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10446 %{
10447 predicate(VM_Version::supports_fast_2op_lea());
10448 match(Set dst (AddL (LShiftL index scale) disp));
10449
10450 format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10451 ins_encode %{
10452 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10453 __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10454 %}
10455 ins_pipe(ialu_reg_reg);
10456 %}
10457
10458 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10459 %{
10460 predicate(VM_Version::supports_fast_3op_lea());
10461 match(Set dst (AddL (AddL base index) disp));
10462
10463 format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10464 ins_encode %{
10465 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10466 %}
10467 ins_pipe(ialu_reg_reg);
10468 %}
10469
10470 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10471 %{
10472 predicate(VM_Version::supports_fast_2op_lea());
10473 match(Set dst (AddL base (LShiftL index scale)));
10474
10475 format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10476 ins_encode %{
10477 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10478 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10479 %}
10480 ins_pipe(ialu_reg_reg);
10481 %}
10482
10483 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10484 %{
10485 predicate(VM_Version::supports_fast_3op_lea());
10486 match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10487
10488 format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10489 ins_encode %{
10490 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10491 __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10492 %}
10493 ins_pipe(ialu_reg_reg);
10494 %}
10495
10496 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10497 %{
10498 match(Set dst (AddP dst src));
10499 effect(KILL cr);
10500 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10501
10502 format %{ "addq $dst, $src\t# ptr" %}
10503 ins_encode %{
10504 __ addq($dst$$Register, $src$$Register);
10505 %}
10506 ins_pipe(ialu_reg_reg);
10507 %}
10508
10509 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10510 %{
10511 match(Set dst (AddP dst src));
10512 effect(KILL cr);
10513 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10514
10515 format %{ "addq $dst, $src\t# ptr" %}
10516 ins_encode %{
10517 __ addq($dst$$Register, $src$$constant);
10518 %}
10519 ins_pipe( ialu_reg );
10520 %}
10521
10522 // XXX addP mem ops ????
10523
10524 instruct checkCastPP(rRegP dst)
10525 %{
10526 match(Set dst (CheckCastPP dst));
10527
10528 size(0);
10529 format %{ "# checkcastPP of $dst" %}
10530 ins_encode(/* empty encoding */);
10531 ins_pipe(empty);
10532 %}
10533
10534 instruct castPP(rRegP dst)
10535 %{
10536 match(Set dst (CastPP dst));
10537
10538 size(0);
10539 format %{ "# castPP of $dst" %}
10540 ins_encode(/* empty encoding */);
10541 ins_pipe(empty);
10542 %}
10543
10544 instruct castII(rRegI dst)
10545 %{
10546 predicate(VerifyConstraintCasts == 0);
10547 match(Set dst (CastII dst));
10548
10549 size(0);
10550 format %{ "# castII of $dst" %}
10551 ins_encode(/* empty encoding */);
10552 ins_cost(0);
10553 ins_pipe(empty);
10554 %}
10555
10556 instruct castII_checked(rRegI dst, rFlagsReg cr)
10557 %{
10558 predicate(VerifyConstraintCasts > 0);
10559 match(Set dst (CastII dst));
10560
10561 effect(KILL cr);
10562 format %{ "# cast_checked_II $dst" %}
10563 ins_encode %{
10564 __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10565 %}
10566 ins_pipe(pipe_slow);
10567 %}
10568
10569 instruct castLL(rRegL dst)
10570 %{
10571 predicate(VerifyConstraintCasts == 0);
10572 match(Set dst (CastLL dst));
10573
10574 size(0);
10575 format %{ "# castLL of $dst" %}
10576 ins_encode(/* empty encoding */);
10577 ins_cost(0);
10578 ins_pipe(empty);
10579 %}
10580
10581 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10582 %{
10583 predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10584 match(Set dst (CastLL dst));
10585
10586 effect(KILL cr);
10587 format %{ "# cast_checked_LL $dst" %}
10588 ins_encode %{
10589 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10590 %}
10591 ins_pipe(pipe_slow);
10592 %}
10593
10594 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10595 %{
10596 predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10597 match(Set dst (CastLL dst));
10598
10599 effect(KILL cr, TEMP tmp);
10600 format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10601 ins_encode %{
10602 __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10603 %}
10604 ins_pipe(pipe_slow);
10605 %}
10606
10607 instruct castFF(regF dst)
10608 %{
10609 match(Set dst (CastFF dst));
10610
10611 size(0);
10612 format %{ "# castFF of $dst" %}
10613 ins_encode(/* empty encoding */);
10614 ins_cost(0);
10615 ins_pipe(empty);
10616 %}
10617
10618 instruct castHH(regF dst)
10619 %{
10620 match(Set dst (CastHH dst));
10621
10622 size(0);
10623 format %{ "# castHH of $dst" %}
10624 ins_encode(/* empty encoding */);
10625 ins_cost(0);
10626 ins_pipe(empty);
10627 %}
10628
10629 instruct castDD(regD dst)
10630 %{
10631 match(Set dst (CastDD dst));
10632
10633 size(0);
10634 format %{ "# castDD of $dst" %}
10635 ins_encode(/* empty encoding */);
10636 ins_cost(0);
10637 ins_pipe(empty);
10638 %}
10639
10640 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10641 instruct compareAndSwapP(rRegI res,
10642 memory mem_ptr,
10643 rax_RegP oldval, rRegP newval,
10644 rFlagsReg cr)
10645 %{
10646 predicate(n->as_LoadStore()->barrier_data() == 0);
10647 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10648 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10649 effect(KILL cr, KILL oldval);
10650
10651 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10652 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10653 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10654 ins_encode %{
10655 __ lock();
10656 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10657 __ setcc(Assembler::equal, $res$$Register);
10658 %}
10659 ins_pipe( pipe_cmpxchg );
10660 %}
10661
10662 instruct compareAndSwapL(rRegI res,
10663 memory mem_ptr,
10664 rax_RegL oldval, rRegL newval,
10665 rFlagsReg cr)
10666 %{
10667 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10668 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10669 effect(KILL cr, KILL oldval);
10670
10671 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10672 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10673 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10674 ins_encode %{
10675 __ lock();
10676 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10677 __ setcc(Assembler::equal, $res$$Register);
10678 %}
10679 ins_pipe( pipe_cmpxchg );
10680 %}
10681
10682 instruct compareAndSwapI(rRegI res,
10683 memory mem_ptr,
10684 rax_RegI oldval, rRegI newval,
10685 rFlagsReg cr)
10686 %{
10687 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10688 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10689 effect(KILL cr, KILL oldval);
10690
10691 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10692 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10693 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10694 ins_encode %{
10695 __ lock();
10696 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10697 __ setcc(Assembler::equal, $res$$Register);
10698 %}
10699 ins_pipe( pipe_cmpxchg );
10700 %}
10701
10702 instruct compareAndSwapB(rRegI res,
10703 memory mem_ptr,
10704 rax_RegI oldval, rRegI newval,
10705 rFlagsReg cr)
10706 %{
10707 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10708 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10709 effect(KILL cr, KILL oldval);
10710
10711 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10712 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10713 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10714 ins_encode %{
10715 __ lock();
10716 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10717 __ setcc(Assembler::equal, $res$$Register);
10718 %}
10719 ins_pipe( pipe_cmpxchg );
10720 %}
10721
10722 instruct compareAndSwapS(rRegI res,
10723 memory mem_ptr,
10724 rax_RegI oldval, rRegI newval,
10725 rFlagsReg cr)
10726 %{
10727 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10728 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10729 effect(KILL cr, KILL oldval);
10730
10731 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10732 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10733 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10734 ins_encode %{
10735 __ lock();
10736 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10737 __ setcc(Assembler::equal, $res$$Register);
10738 %}
10739 ins_pipe( pipe_cmpxchg );
10740 %}
10741
10742 instruct compareAndSwapN(rRegI res,
10743 memory mem_ptr,
10744 rax_RegN oldval, rRegN newval,
10745 rFlagsReg cr) %{
10746 predicate(n->as_LoadStore()->barrier_data() == 0);
10747 match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10748 match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10749 effect(KILL cr, KILL oldval);
10750
10751 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10752 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10753 "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10754 ins_encode %{
10755 __ lock();
10756 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10757 __ setcc(Assembler::equal, $res$$Register);
10758 %}
10759 ins_pipe( pipe_cmpxchg );
10760 %}
10761
10762 instruct compareAndExchangeB(
10763 memory mem_ptr,
10764 rax_RegI oldval, rRegI newval,
10765 rFlagsReg cr)
10766 %{
10767 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10768 effect(KILL cr);
10769
10770 format %{ "cmpxchgb $mem_ptr,$newval\t# "
10771 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10772 ins_encode %{
10773 __ lock();
10774 __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10775 %}
10776 ins_pipe( pipe_cmpxchg );
10777 %}
10778
10779 instruct compareAndExchangeS(
10780 memory mem_ptr,
10781 rax_RegI oldval, rRegI newval,
10782 rFlagsReg cr)
10783 %{
10784 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10785 effect(KILL cr);
10786
10787 format %{ "cmpxchgw $mem_ptr,$newval\t# "
10788 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10789 ins_encode %{
10790 __ lock();
10791 __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10792 %}
10793 ins_pipe( pipe_cmpxchg );
10794 %}
10795
10796 instruct compareAndExchangeI(
10797 memory mem_ptr,
10798 rax_RegI oldval, rRegI newval,
10799 rFlagsReg cr)
10800 %{
10801 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10802 effect(KILL cr);
10803
10804 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10805 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10806 ins_encode %{
10807 __ lock();
10808 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10809 %}
10810 ins_pipe( pipe_cmpxchg );
10811 %}
10812
10813 instruct compareAndExchangeL(
10814 memory mem_ptr,
10815 rax_RegL oldval, rRegL newval,
10816 rFlagsReg cr)
10817 %{
10818 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10819 effect(KILL cr);
10820
10821 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10822 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10823 ins_encode %{
10824 __ lock();
10825 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10826 %}
10827 ins_pipe( pipe_cmpxchg );
10828 %}
10829
10830 instruct compareAndExchangeN(
10831 memory mem_ptr,
10832 rax_RegN oldval, rRegN newval,
10833 rFlagsReg cr) %{
10834 predicate(n->as_LoadStore()->barrier_data() == 0);
10835 match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10836 effect(KILL cr);
10837
10838 format %{ "cmpxchgl $mem_ptr,$newval\t# "
10839 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10840 ins_encode %{
10841 __ lock();
10842 __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10843 %}
10844 ins_pipe( pipe_cmpxchg );
10845 %}
10846
10847 instruct compareAndExchangeP(
10848 memory mem_ptr,
10849 rax_RegP oldval, rRegP newval,
10850 rFlagsReg cr)
10851 %{
10852 predicate(n->as_LoadStore()->barrier_data() == 0);
10853 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10854 effect(KILL cr);
10855
10856 format %{ "cmpxchgq $mem_ptr,$newval\t# "
10857 "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10858 ins_encode %{
10859 __ lock();
10860 __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10861 %}
10862 ins_pipe( pipe_cmpxchg );
10863 %}
10864
10865 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10866 predicate(n->as_LoadStore()->result_not_used());
10867 match(Set dummy (GetAndAddB mem add));
10868 effect(KILL cr);
10869 format %{ "addb_lock $mem, $add" %}
10870 ins_encode %{
10871 __ lock();
10872 __ addb($mem$$Address, $add$$Register);
10873 %}
10874 ins_pipe(pipe_cmpxchg);
10875 %}
10876
10877 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10878 predicate(n->as_LoadStore()->result_not_used());
10879 match(Set dummy (GetAndAddB mem add));
10880 effect(KILL cr);
10881 format %{ "addb_lock $mem, $add" %}
10882 ins_encode %{
10883 __ lock();
10884 __ addb($mem$$Address, $add$$constant);
10885 %}
10886 ins_pipe(pipe_cmpxchg);
10887 %}
10888
10889 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10890 predicate(!n->as_LoadStore()->result_not_used());
10891 match(Set newval (GetAndAddB mem newval));
10892 effect(KILL cr);
10893 format %{ "xaddb_lock $mem, $newval\t# $newval -> byte" %}
10894 ins_encode %{
10895 __ lock();
10896 __ xaddb($mem$$Address, $newval$$Register);
10897 __ narrow_subword_type($newval$$Register, T_BYTE);
10898 %}
10899 ins_pipe(pipe_cmpxchg);
10900 %}
10901
10902 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10903 predicate(n->as_LoadStore()->result_not_used());
10904 match(Set dummy (GetAndAddS mem add));
10905 effect(KILL cr);
10906 format %{ "addw_lock $mem, $add" %}
10907 ins_encode %{
10908 __ lock();
10909 __ addw($mem$$Address, $add$$Register);
10910 %}
10911 ins_pipe(pipe_cmpxchg);
10912 %}
10913
10914 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10915 predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10916 match(Set dummy (GetAndAddS mem add));
10917 effect(KILL cr);
10918 format %{ "addw_lock $mem, $add" %}
10919 ins_encode %{
10920 __ lock();
10921 __ addw($mem$$Address, $add$$constant);
10922 %}
10923 ins_pipe(pipe_cmpxchg);
10924 %}
10925
10926 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10927 predicate(!n->as_LoadStore()->result_not_used());
10928 match(Set newval (GetAndAddS mem newval));
10929 effect(KILL cr);
10930 format %{ "xaddw_lock $mem, $newval\t# $newval -> short" %}
10931 ins_encode %{
10932 __ lock();
10933 __ xaddw($mem$$Address, $newval$$Register);
10934 __ narrow_subword_type($newval$$Register, T_SHORT);
10935 %}
10936 ins_pipe(pipe_cmpxchg);
10937 %}
10938
10939 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10940 predicate(n->as_LoadStore()->result_not_used());
10941 match(Set dummy (GetAndAddI mem add));
10942 effect(KILL cr);
10943 format %{ "addl_lock $mem, $add" %}
10944 ins_encode %{
10945 __ lock();
10946 __ addl($mem$$Address, $add$$Register);
10947 %}
10948 ins_pipe(pipe_cmpxchg);
10949 %}
10950
10951 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10952 predicate(n->as_LoadStore()->result_not_used());
10953 match(Set dummy (GetAndAddI mem add));
10954 effect(KILL cr);
10955 format %{ "addl_lock $mem, $add" %}
10956 ins_encode %{
10957 __ lock();
10958 __ addl($mem$$Address, $add$$constant);
10959 %}
10960 ins_pipe(pipe_cmpxchg);
10961 %}
10962
10963 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10964 predicate(!n->as_LoadStore()->result_not_used());
10965 match(Set newval (GetAndAddI mem newval));
10966 effect(KILL cr);
10967 format %{ "xaddl_lock $mem, $newval" %}
10968 ins_encode %{
10969 __ lock();
10970 __ xaddl($mem$$Address, $newval$$Register);
10971 %}
10972 ins_pipe(pipe_cmpxchg);
10973 %}
10974
10975 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10976 predicate(n->as_LoadStore()->result_not_used());
10977 match(Set dummy (GetAndAddL mem add));
10978 effect(KILL cr);
10979 format %{ "addq_lock $mem, $add" %}
10980 ins_encode %{
10981 __ lock();
10982 __ addq($mem$$Address, $add$$Register);
10983 %}
10984 ins_pipe(pipe_cmpxchg);
10985 %}
10986
10987 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10988 predicate(n->as_LoadStore()->result_not_used());
10989 match(Set dummy (GetAndAddL mem add));
10990 effect(KILL cr);
10991 format %{ "addq_lock $mem, $add" %}
10992 ins_encode %{
10993 __ lock();
10994 __ addq($mem$$Address, $add$$constant);
10995 %}
10996 ins_pipe(pipe_cmpxchg);
10997 %}
10998
10999 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11000 predicate(!n->as_LoadStore()->result_not_used());
11001 match(Set newval (GetAndAddL mem newval));
11002 effect(KILL cr);
11003 format %{ "xaddq_lock $mem, $newval" %}
11004 ins_encode %{
11005 __ lock();
11006 __ xaddq($mem$$Address, $newval$$Register);
11007 %}
11008 ins_pipe(pipe_cmpxchg);
11009 %}
11010
11011 instruct xchgB( memory mem, rRegI newval) %{
11012 match(Set newval (GetAndSetB mem newval));
11013 format %{ "XCHGB $newval,[$mem]\t# $newval -> byte" %}
11014 ins_encode %{
11015 __ xchgb($newval$$Register, $mem$$Address);
11016 __ narrow_subword_type($newval$$Register, T_BYTE);
11017 %}
11018 ins_pipe( pipe_cmpxchg );
11019 %}
11020
11021 instruct xchgS( memory mem, rRegI newval) %{
11022 match(Set newval (GetAndSetS mem newval));
11023 format %{ "XCHGW $newval,[$mem]\t# $newval -> short" %}
11024 ins_encode %{
11025 __ xchgw($newval$$Register, $mem$$Address);
11026 __ narrow_subword_type($newval$$Register, T_SHORT);
11027 %}
11028 ins_pipe( pipe_cmpxchg );
11029 %}
11030
11031 instruct xchgI( memory mem, rRegI newval) %{
11032 match(Set newval (GetAndSetI mem newval));
11033 format %{ "XCHGL $newval,[$mem]" %}
11034 ins_encode %{
11035 __ xchgl($newval$$Register, $mem$$Address);
11036 %}
11037 ins_pipe( pipe_cmpxchg );
11038 %}
11039
11040 instruct xchgL( memory mem, rRegL newval) %{
11041 match(Set newval (GetAndSetL mem newval));
11042 format %{ "XCHGL $newval,[$mem]" %}
11043 ins_encode %{
11044 __ xchgq($newval$$Register, $mem$$Address);
11045 %}
11046 ins_pipe( pipe_cmpxchg );
11047 %}
11048
11049 instruct xchgP( memory mem, rRegP newval) %{
11050 match(Set newval (GetAndSetP mem newval));
11051 predicate(n->as_LoadStore()->barrier_data() == 0);
11052 format %{ "XCHGQ $newval,[$mem]" %}
11053 ins_encode %{
11054 __ xchgq($newval$$Register, $mem$$Address);
11055 %}
11056 ins_pipe( pipe_cmpxchg );
11057 %}
11058
11059 instruct xchgN( memory mem, rRegN newval) %{
11060 predicate(n->as_LoadStore()->barrier_data() == 0);
11061 match(Set newval (GetAndSetN mem newval));
11062 format %{ "XCHGL $newval,$mem]" %}
11063 ins_encode %{
11064 __ xchgl($newval$$Register, $mem$$Address);
11065 %}
11066 ins_pipe( pipe_cmpxchg );
11067 %}
11068
11069 //----------Abs Instructions-------------------------------------------
11070
11071 // Integer Absolute Instructions
11072 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11073 %{
11074 match(Set dst (AbsI src));
11075 effect(TEMP dst, KILL cr);
11076 format %{ "xorl $dst, $dst\t# abs int\n\t"
11077 "subl $dst, $src\n\t"
11078 "cmovll $dst, $src" %}
11079 ins_encode %{
11080 __ xorl($dst$$Register, $dst$$Register);
11081 __ subl($dst$$Register, $src$$Register);
11082 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11083 %}
11084
11085 ins_pipe(ialu_reg_reg);
11086 %}
11087
11088 // Long Absolute Instructions
11089 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11090 %{
11091 match(Set dst (AbsL src));
11092 effect(TEMP dst, KILL cr);
11093 format %{ "xorl $dst, $dst\t# abs long\n\t"
11094 "subq $dst, $src\n\t"
11095 "cmovlq $dst, $src" %}
11096 ins_encode %{
11097 __ xorl($dst$$Register, $dst$$Register);
11098 __ subq($dst$$Register, $src$$Register);
11099 __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11100 %}
11101
11102 ins_pipe(ialu_reg_reg);
11103 %}
11104
11105 //----------Subtraction Instructions-------------------------------------------
11106
11107 // Integer Subtraction Instructions
11108 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11109 %{
11110 predicate(!UseAPX);
11111 match(Set dst (SubI dst src));
11112 effect(KILL cr);
11113 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11114
11115 format %{ "subl $dst, $src\t# int" %}
11116 ins_encode %{
11117 __ subl($dst$$Register, $src$$Register);
11118 %}
11119 ins_pipe(ialu_reg_reg);
11120 %}
11121
11122 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11123 %{
11124 predicate(UseAPX);
11125 match(Set dst (SubI src1 src2));
11126 effect(KILL cr);
11127 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11128
11129 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11130 ins_encode %{
11131 __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11132 %}
11133 ins_pipe(ialu_reg_reg);
11134 %}
11135
11136 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11137 %{
11138 predicate(UseAPX);
11139 match(Set dst (SubI src1 src2));
11140 effect(KILL cr);
11141 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11142
11143 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11144 ins_encode %{
11145 __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11146 %}
11147 ins_pipe(ialu_reg_reg);
11148 %}
11149
11150 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11151 %{
11152 predicate(UseAPX);
11153 match(Set dst (SubI (LoadI src1) src2));
11154 effect(KILL cr);
11155 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11156
11157 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11158 ins_encode %{
11159 __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11160 %}
11161 ins_pipe(ialu_reg_reg);
11162 %}
11163
11164 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11165 %{
11166 predicate(!UseAPX);
11167 match(Set dst (SubI dst (LoadI src)));
11168 effect(KILL cr);
11169 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11170
11171 ins_cost(150);
11172 format %{ "subl $dst, $src\t# int" %}
11173 ins_encode %{
11174 __ subl($dst$$Register, $src$$Address);
11175 %}
11176 ins_pipe(ialu_reg_mem);
11177 %}
11178
11179 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11180 %{
11181 predicate(UseAPX);
11182 match(Set dst (SubI src1 (LoadI src2)));
11183 effect(KILL cr);
11184 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11185
11186 ins_cost(150);
11187 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11188 ins_encode %{
11189 __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11190 %}
11191 ins_pipe(ialu_reg_mem);
11192 %}
11193
11194 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11195 %{
11196 predicate(UseAPX);
11197 match(Set dst (SubI (LoadI src1) src2));
11198 effect(KILL cr);
11199 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11200
11201 ins_cost(150);
11202 format %{ "esubl $dst, $src1, $src2\t# int ndd" %}
11203 ins_encode %{
11204 __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11205 %}
11206 ins_pipe(ialu_reg_mem);
11207 %}
11208
11209 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11210 %{
11211 match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11212 effect(KILL cr);
11213 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11214
11215 ins_cost(150);
11216 format %{ "subl $dst, $src\t# int" %}
11217 ins_encode %{
11218 __ subl($dst$$Address, $src$$Register);
11219 %}
11220 ins_pipe(ialu_mem_reg);
11221 %}
11222
11223 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11224 %{
11225 predicate(!UseAPX);
11226 match(Set dst (SubL dst src));
11227 effect(KILL cr);
11228 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11229
11230 format %{ "subq $dst, $src\t# long" %}
11231 ins_encode %{
11232 __ subq($dst$$Register, $src$$Register);
11233 %}
11234 ins_pipe(ialu_reg_reg);
11235 %}
11236
11237 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11238 %{
11239 predicate(UseAPX);
11240 match(Set dst (SubL src1 src2));
11241 effect(KILL cr);
11242 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11243
11244 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11245 ins_encode %{
11246 __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11247 %}
11248 ins_pipe(ialu_reg_reg);
11249 %}
11250
11251 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11252 %{
11253 predicate(UseAPX);
11254 match(Set dst (SubL src1 src2));
11255 effect(KILL cr);
11256 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11257
11258 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11259 ins_encode %{
11260 __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11261 %}
11262 ins_pipe(ialu_reg_reg);
11263 %}
11264
11265 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11266 %{
11267 predicate(UseAPX);
11268 match(Set dst (SubL (LoadL src1) src2));
11269 effect(KILL cr);
11270 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11271
11272 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11273 ins_encode %{
11274 __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11275 %}
11276 ins_pipe(ialu_reg_reg);
11277 %}
11278
11279 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11280 %{
11281 predicate(!UseAPX);
11282 match(Set dst (SubL dst (LoadL src)));
11283 effect(KILL cr);
11284 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11285
11286 ins_cost(150);
11287 format %{ "subq $dst, $src\t# long" %}
11288 ins_encode %{
11289 __ subq($dst$$Register, $src$$Address);
11290 %}
11291 ins_pipe(ialu_reg_mem);
11292 %}
11293
11294 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11295 %{
11296 predicate(UseAPX);
11297 match(Set dst (SubL src1 (LoadL src2)));
11298 effect(KILL cr);
11299 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11300
11301 ins_cost(150);
11302 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11303 ins_encode %{
11304 __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11305 %}
11306 ins_pipe(ialu_reg_mem);
11307 %}
11308
11309 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11310 %{
11311 predicate(UseAPX);
11312 match(Set dst (SubL (LoadL src1) src2));
11313 effect(KILL cr);
11314 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11315
11316 ins_cost(150);
11317 format %{ "esubq $dst, $src1, $src2\t# long ndd" %}
11318 ins_encode %{
11319 __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11320 %}
11321 ins_pipe(ialu_reg_mem);
11322 %}
11323
11324 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11325 %{
11326 match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11327 effect(KILL cr);
11328 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11329
11330 ins_cost(150);
11331 format %{ "subq $dst, $src\t# long" %}
11332 ins_encode %{
11333 __ subq($dst$$Address, $src$$Register);
11334 %}
11335 ins_pipe(ialu_mem_reg);
11336 %}
11337
11338 // Subtract from a pointer
11339 // XXX hmpf???
11340 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11341 %{
11342 match(Set dst (AddP dst (SubI zero src)));
11343 effect(KILL cr);
11344
11345 format %{ "subq $dst, $src\t# ptr - int" %}
11346 ins_encode %{
11347 __ subq($dst$$Register, $src$$Register);
11348 %}
11349 ins_pipe(ialu_reg_reg);
11350 %}
11351
11352 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11353 %{
11354 predicate(!UseAPX);
11355 match(Set dst (SubI zero dst));
11356 effect(KILL cr);
11357 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11358
11359 format %{ "negl $dst\t# int" %}
11360 ins_encode %{
11361 __ negl($dst$$Register);
11362 %}
11363 ins_pipe(ialu_reg);
11364 %}
11365
11366 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11367 %{
11368 predicate(UseAPX);
11369 match(Set dst (SubI zero src));
11370 effect(KILL cr);
11371 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11372
11373 format %{ "enegl $dst, $src\t# int ndd" %}
11374 ins_encode %{
11375 __ enegl($dst$$Register, $src$$Register, false);
11376 %}
11377 ins_pipe(ialu_reg);
11378 %}
11379
11380 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11381 %{
11382 predicate(!UseAPX);
11383 match(Set dst (NegI dst));
11384 effect(KILL cr);
11385 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11386
11387 format %{ "negl $dst\t# int" %}
11388 ins_encode %{
11389 __ negl($dst$$Register);
11390 %}
11391 ins_pipe(ialu_reg);
11392 %}
11393
11394 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11395 %{
11396 predicate(UseAPX);
11397 match(Set dst (NegI src));
11398 effect(KILL cr);
11399 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11400
11401 format %{ "enegl $dst, $src\t# int ndd" %}
11402 ins_encode %{
11403 __ enegl($dst$$Register, $src$$Register, false);
11404 %}
11405 ins_pipe(ialu_reg);
11406 %}
11407
11408 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11409 %{
11410 match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11411 effect(KILL cr);
11412 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11413
11414 format %{ "negl $dst\t# int" %}
11415 ins_encode %{
11416 __ negl($dst$$Address);
11417 %}
11418 ins_pipe(ialu_reg);
11419 %}
11420
11421 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11422 %{
11423 predicate(!UseAPX);
11424 match(Set dst (SubL zero dst));
11425 effect(KILL cr);
11426 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11427
11428 format %{ "negq $dst\t# long" %}
11429 ins_encode %{
11430 __ negq($dst$$Register);
11431 %}
11432 ins_pipe(ialu_reg);
11433 %}
11434
11435 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11436 %{
11437 predicate(UseAPX);
11438 match(Set dst (SubL zero src));
11439 effect(KILL cr);
11440 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11441
11442 format %{ "enegq $dst, $src\t# long ndd" %}
11443 ins_encode %{
11444 __ enegq($dst$$Register, $src$$Register, false);
11445 %}
11446 ins_pipe(ialu_reg);
11447 %}
11448
11449 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11450 %{
11451 predicate(!UseAPX);
11452 match(Set dst (NegL dst));
11453 effect(KILL cr);
11454 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11455
11456 format %{ "negq $dst\t# int" %}
11457 ins_encode %{
11458 __ negq($dst$$Register);
11459 %}
11460 ins_pipe(ialu_reg);
11461 %}
11462
11463 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11464 %{
11465 predicate(UseAPX);
11466 match(Set dst (NegL src));
11467 effect(KILL cr);
11468 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11469
11470 format %{ "enegq $dst, $src\t# long ndd" %}
11471 ins_encode %{
11472 __ enegq($dst$$Register, $src$$Register, false);
11473 %}
11474 ins_pipe(ialu_reg);
11475 %}
11476
11477 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11478 %{
11479 match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11480 effect(KILL cr);
11481 flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11482
11483 format %{ "negq $dst\t# long" %}
11484 ins_encode %{
11485 __ negq($dst$$Address);
11486 %}
11487 ins_pipe(ialu_reg);
11488 %}
11489
11490 //----------Multiplication/Division Instructions-------------------------------
11491 // Integer Multiplication Instructions
11492 // Multiply Register
11493
11494 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11495 %{
11496 predicate(!UseAPX);
11497 match(Set dst (MulI dst src));
11498 effect(KILL cr);
11499
11500 ins_cost(300);
11501 format %{ "imull $dst, $src\t# int" %}
11502 ins_encode %{
11503 __ imull($dst$$Register, $src$$Register);
11504 %}
11505 ins_pipe(ialu_reg_reg_alu0);
11506 %}
11507
11508 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11509 %{
11510 predicate(UseAPX);
11511 match(Set dst (MulI src1 src2));
11512 effect(KILL cr);
11513 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11514
11515 ins_cost(300);
11516 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11517 ins_encode %{
11518 __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11519 %}
11520 ins_pipe(ialu_reg_reg_alu0);
11521 %}
11522
11523 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11524 %{
11525 match(Set dst (MulI src imm));
11526 effect(KILL cr);
11527
11528 ins_cost(300);
11529 format %{ "imull $dst, $src, $imm\t# int" %}
11530 ins_encode %{
11531 __ imull($dst$$Register, $src$$Register, $imm$$constant);
11532 %}
11533 ins_pipe(ialu_reg_reg_alu0);
11534 %}
11535
11536 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11537 %{
11538 predicate(!UseAPX);
11539 match(Set dst (MulI dst (LoadI src)));
11540 effect(KILL cr);
11541
11542 ins_cost(350);
11543 format %{ "imull $dst, $src\t# int" %}
11544 ins_encode %{
11545 __ imull($dst$$Register, $src$$Address);
11546 %}
11547 ins_pipe(ialu_reg_mem_alu0);
11548 %}
11549
11550 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11551 %{
11552 predicate(UseAPX);
11553 match(Set dst (MulI src1 (LoadI src2)));
11554 effect(KILL cr);
11555 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11556
11557 ins_cost(350);
11558 format %{ "eimull $dst, $src1, $src2\t# int ndd" %}
11559 ins_encode %{
11560 __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11561 %}
11562 ins_pipe(ialu_reg_mem_alu0);
11563 %}
11564
11565 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11566 %{
11567 match(Set dst (MulI (LoadI src) imm));
11568 effect(KILL cr);
11569
11570 ins_cost(300);
11571 format %{ "imull $dst, $src, $imm\t# int" %}
11572 ins_encode %{
11573 __ imull($dst$$Register, $src$$Address, $imm$$constant);
11574 %}
11575 ins_pipe(ialu_reg_mem_alu0);
11576 %}
11577
11578 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11579 %{
11580 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11581 effect(KILL cr, KILL src2);
11582
11583 expand %{ mulI_rReg(dst, src1, cr);
11584 mulI_rReg(src2, src3, cr);
11585 addI_rReg(dst, src2, cr); %}
11586 %}
11587
11588 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11589 %{
11590 predicate(!UseAPX);
11591 match(Set dst (MulL dst src));
11592 effect(KILL cr);
11593
11594 ins_cost(300);
11595 format %{ "imulq $dst, $src\t# long" %}
11596 ins_encode %{
11597 __ imulq($dst$$Register, $src$$Register);
11598 %}
11599 ins_pipe(ialu_reg_reg_alu0);
11600 %}
11601
11602 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11603 %{
11604 predicate(UseAPX);
11605 match(Set dst (MulL src1 src2));
11606 effect(KILL cr);
11607 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11608
11609 ins_cost(300);
11610 format %{ "eimulq $dst, $src1, $src2\t# long ndd" %}
11611 ins_encode %{
11612 __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11613 %}
11614 ins_pipe(ialu_reg_reg_alu0);
11615 %}
11616
11617 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11618 %{
11619 match(Set dst (MulL src imm));
11620 effect(KILL cr);
11621
11622 ins_cost(300);
11623 format %{ "imulq $dst, $src, $imm\t# long" %}
11624 ins_encode %{
11625 __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11626 %}
11627 ins_pipe(ialu_reg_reg_alu0);
11628 %}
11629
11630 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11631 %{
11632 predicate(!UseAPX);
11633 match(Set dst (MulL dst (LoadL src)));
11634 effect(KILL cr);
11635
11636 ins_cost(350);
11637 format %{ "imulq $dst, $src\t# long" %}
11638 ins_encode %{
11639 __ imulq($dst$$Register, $src$$Address);
11640 %}
11641 ins_pipe(ialu_reg_mem_alu0);
11642 %}
11643
11644 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11645 %{
11646 predicate(UseAPX);
11647 match(Set dst (MulL src1 (LoadL src2)));
11648 effect(KILL cr);
11649 flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11650
11651 ins_cost(350);
11652 format %{ "eimulq $dst, $src1, $src2 \t# long" %}
11653 ins_encode %{
11654 __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11655 %}
11656 ins_pipe(ialu_reg_mem_alu0);
11657 %}
11658
11659 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11660 %{
11661 match(Set dst (MulL (LoadL src) imm));
11662 effect(KILL cr);
11663
11664 ins_cost(300);
11665 format %{ "imulq $dst, $src, $imm\t# long" %}
11666 ins_encode %{
11667 __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11668 %}
11669 ins_pipe(ialu_reg_mem_alu0);
11670 %}
11671
11672 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11673 %{
11674 match(Set dst (MulHiL src rax));
11675 effect(USE_KILL rax, KILL cr);
11676
11677 ins_cost(300);
11678 format %{ "imulq RDX:RAX, RAX, $src\t# mulhi" %}
11679 ins_encode %{
11680 __ imulq($src$$Register);
11681 %}
11682 ins_pipe(ialu_reg_reg_alu0);
11683 %}
11684
11685 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11686 %{
11687 match(Set dst (UMulHiL src rax));
11688 effect(USE_KILL rax, KILL cr);
11689
11690 ins_cost(300);
11691 format %{ "mulq RDX:RAX, RAX, $src\t# umulhi" %}
11692 ins_encode %{
11693 __ mulq($src$$Register);
11694 %}
11695 ins_pipe(ialu_reg_reg_alu0);
11696 %}
11697
11698 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11699 rFlagsReg cr)
11700 %{
11701 match(Set rax (DivI rax div));
11702 effect(KILL rdx, KILL cr);
11703
11704 ins_cost(30*100+10*100); // XXX
11705 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11706 "jne,s normal\n\t"
11707 "xorl rdx, rdx\n\t"
11708 "cmpl $div, -1\n\t"
11709 "je,s done\n"
11710 "normal: cdql\n\t"
11711 "idivl $div\n"
11712 "done:" %}
11713 ins_encode(cdql_enc(div));
11714 ins_pipe(ialu_reg_reg_alu0);
11715 %}
11716
11717 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11718 rFlagsReg cr)
11719 %{
11720 match(Set rax (DivL rax div));
11721 effect(KILL rdx, KILL cr);
11722
11723 ins_cost(30*100+10*100); // XXX
11724 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11725 "cmpq rax, rdx\n\t"
11726 "jne,s normal\n\t"
11727 "xorl rdx, rdx\n\t"
11728 "cmpq $div, -1\n\t"
11729 "je,s done\n"
11730 "normal: cdqq\n\t"
11731 "idivq $div\n"
11732 "done:" %}
11733 ins_encode(cdqq_enc(div));
11734 ins_pipe(ialu_reg_reg_alu0);
11735 %}
11736
11737 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11738 %{
11739 match(Set rax (UDivI rax div));
11740 effect(KILL rdx, KILL cr);
11741
11742 ins_cost(300);
11743 format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11744 ins_encode %{
11745 __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11746 %}
11747 ins_pipe(ialu_reg_reg_alu0);
11748 %}
11749
11750 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11751 %{
11752 match(Set rax (UDivL rax div));
11753 effect(KILL rdx, KILL cr);
11754
11755 ins_cost(300);
11756 format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11757 ins_encode %{
11758 __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11759 %}
11760 ins_pipe(ialu_reg_reg_alu0);
11761 %}
11762
11763 // Integer DIVMOD with Register, both quotient and mod results
11764 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11765 rFlagsReg cr)
11766 %{
11767 match(DivModI rax div);
11768 effect(KILL cr);
11769
11770 ins_cost(30*100+10*100); // XXX
11771 format %{ "cmpl rax, 0x80000000\t# idiv\n\t"
11772 "jne,s normal\n\t"
11773 "xorl rdx, rdx\n\t"
11774 "cmpl $div, -1\n\t"
11775 "je,s done\n"
11776 "normal: cdql\n\t"
11777 "idivl $div\n"
11778 "done:" %}
11779 ins_encode(cdql_enc(div));
11780 ins_pipe(pipe_slow);
11781 %}
11782
11783 // Long DIVMOD with Register, both quotient and mod results
11784 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11785 rFlagsReg cr)
11786 %{
11787 match(DivModL rax div);
11788 effect(KILL cr);
11789
11790 ins_cost(30*100+10*100); // XXX
11791 format %{ "movq rdx, 0x8000000000000000\t# ldiv\n\t"
11792 "cmpq rax, rdx\n\t"
11793 "jne,s normal\n\t"
11794 "xorl rdx, rdx\n\t"
11795 "cmpq $div, -1\n\t"
11796 "je,s done\n"
11797 "normal: cdqq\n\t"
11798 "idivq $div\n"
11799 "done:" %}
11800 ins_encode(cdqq_enc(div));
11801 ins_pipe(pipe_slow);
11802 %}
11803
11804 // Unsigned integer DIVMOD with Register, both quotient and mod results
11805 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11806 no_rax_rdx_RegI div, rFlagsReg cr)
11807 %{
11808 match(UDivModI rax div);
11809 effect(TEMP tmp, KILL cr);
11810
11811 ins_cost(300);
11812 format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11813 "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11814 %}
11815 ins_encode %{
11816 __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11817 %}
11818 ins_pipe(pipe_slow);
11819 %}
11820
11821 // Unsigned long DIVMOD with Register, both quotient and mod results
11822 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11823 no_rax_rdx_RegL div, rFlagsReg cr)
11824 %{
11825 match(UDivModL rax div);
11826 effect(TEMP tmp, KILL cr);
11827
11828 ins_cost(300);
11829 format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11830 "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11831 %}
11832 ins_encode %{
11833 __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11834 %}
11835 ins_pipe(pipe_slow);
11836 %}
11837
11838 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11839 rFlagsReg cr)
11840 %{
11841 match(Set rdx (ModI rax div));
11842 effect(KILL rax, KILL cr);
11843
11844 ins_cost(300); // XXX
11845 format %{ "cmpl rax, 0x80000000\t# irem\n\t"
11846 "jne,s normal\n\t"
11847 "xorl rdx, rdx\n\t"
11848 "cmpl $div, -1\n\t"
11849 "je,s done\n"
11850 "normal: cdql\n\t"
11851 "idivl $div\n"
11852 "done:" %}
11853 ins_encode(cdql_enc(div));
11854 ins_pipe(ialu_reg_reg_alu0);
11855 %}
11856
11857 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11858 rFlagsReg cr)
11859 %{
11860 match(Set rdx (ModL rax div));
11861 effect(KILL rax, KILL cr);
11862
11863 ins_cost(300); // XXX
11864 format %{ "movq rdx, 0x8000000000000000\t# lrem\n\t"
11865 "cmpq rax, rdx\n\t"
11866 "jne,s normal\n\t"
11867 "xorl rdx, rdx\n\t"
11868 "cmpq $div, -1\n\t"
11869 "je,s done\n"
11870 "normal: cdqq\n\t"
11871 "idivq $div\n"
11872 "done:" %}
11873 ins_encode(cdqq_enc(div));
11874 ins_pipe(ialu_reg_reg_alu0);
11875 %}
11876
11877 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11878 %{
11879 match(Set rdx (UModI rax div));
11880 effect(KILL rax, KILL cr);
11881
11882 ins_cost(300);
11883 format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11884 ins_encode %{
11885 __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11886 %}
11887 ins_pipe(ialu_reg_reg_alu0);
11888 %}
11889
11890 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11891 %{
11892 match(Set rdx (UModL rax div));
11893 effect(KILL rax, KILL cr);
11894
11895 ins_cost(300);
11896 format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11897 ins_encode %{
11898 __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11899 %}
11900 ins_pipe(ialu_reg_reg_alu0);
11901 %}
11902
11903 // Integer Shift Instructions
11904 // Shift Left by one, two, three
11905 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11906 %{
11907 predicate(!UseAPX);
11908 match(Set dst (LShiftI dst shift));
11909 effect(KILL cr);
11910
11911 format %{ "sall $dst, $shift" %}
11912 ins_encode %{
11913 __ sall($dst$$Register, $shift$$constant);
11914 %}
11915 ins_pipe(ialu_reg);
11916 %}
11917
11918 // Shift Left by one, two, three
11919 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11920 %{
11921 predicate(UseAPX);
11922 match(Set dst (LShiftI src shift));
11923 effect(KILL cr);
11924 flag(PD::Flag_ndd_demotable_opr1);
11925
11926 format %{ "esall $dst, $src, $shift\t# int(ndd)" %}
11927 ins_encode %{
11928 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11929 %}
11930 ins_pipe(ialu_reg);
11931 %}
11932
11933 // Shift Left by 8-bit immediate
11934 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11935 %{
11936 predicate(!UseAPX);
11937 match(Set dst (LShiftI dst shift));
11938 effect(KILL cr);
11939
11940 format %{ "sall $dst, $shift" %}
11941 ins_encode %{
11942 __ sall($dst$$Register, $shift$$constant);
11943 %}
11944 ins_pipe(ialu_reg);
11945 %}
11946
11947 // Shift Left by 8-bit immediate
11948 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11949 %{
11950 predicate(UseAPX);
11951 match(Set dst (LShiftI src shift));
11952 effect(KILL cr);
11953 flag(PD::Flag_ndd_demotable_opr1);
11954
11955 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11956 ins_encode %{
11957 __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11958 %}
11959 ins_pipe(ialu_reg);
11960 %}
11961
11962 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11963 %{
11964 predicate(UseAPX);
11965 match(Set dst (LShiftI (LoadI src) shift));
11966 effect(KILL cr);
11967
11968 format %{ "esall $dst, $src, $shift\t# int (ndd)" %}
11969 ins_encode %{
11970 __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11971 %}
11972 ins_pipe(ialu_reg);
11973 %}
11974
11975 // Shift Left by 8-bit immediate
11976 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11977 %{
11978 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11979 effect(KILL cr);
11980
11981 format %{ "sall $dst, $shift" %}
11982 ins_encode %{
11983 __ sall($dst$$Address, $shift$$constant);
11984 %}
11985 ins_pipe(ialu_mem_imm);
11986 %}
11987
11988 // Shift Left by variable
11989 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11990 %{
11991 predicate(!VM_Version::supports_bmi2());
11992 match(Set dst (LShiftI dst shift));
11993 effect(KILL cr);
11994
11995 format %{ "sall $dst, $shift" %}
11996 ins_encode %{
11997 __ sall($dst$$Register);
11998 %}
11999 ins_pipe(ialu_reg_reg);
12000 %}
12001
12002 // Shift Left by variable
12003 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12004 %{
12005 predicate(!VM_Version::supports_bmi2());
12006 match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12007 effect(KILL cr);
12008
12009 format %{ "sall $dst, $shift" %}
12010 ins_encode %{
12011 __ sall($dst$$Address);
12012 %}
12013 ins_pipe(ialu_mem_reg);
12014 %}
12015
12016 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12017 %{
12018 predicate(VM_Version::supports_bmi2());
12019 match(Set dst (LShiftI src shift));
12020
12021 format %{ "shlxl $dst, $src, $shift" %}
12022 ins_encode %{
12023 __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12024 %}
12025 ins_pipe(ialu_reg_reg);
12026 %}
12027
12028 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12029 %{
12030 predicate(VM_Version::supports_bmi2());
12031 match(Set dst (LShiftI (LoadI src) shift));
12032 ins_cost(175);
12033 format %{ "shlxl $dst, $src, $shift" %}
12034 ins_encode %{
12035 __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12036 %}
12037 ins_pipe(ialu_reg_mem);
12038 %}
12039
12040 // Arithmetic Shift Right by 8-bit immediate
12041 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12042 %{
12043 predicate(!UseAPX);
12044 match(Set dst (RShiftI dst shift));
12045 effect(KILL cr);
12046
12047 format %{ "sarl $dst, $shift" %}
12048 ins_encode %{
12049 __ sarl($dst$$Register, $shift$$constant);
12050 %}
12051 ins_pipe(ialu_mem_imm);
12052 %}
12053
12054 // Arithmetic Shift Right by 8-bit immediate
12055 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12056 %{
12057 predicate(UseAPX);
12058 match(Set dst (RShiftI src shift));
12059 effect(KILL cr);
12060 flag(PD::Flag_ndd_demotable_opr1);
12061
12062 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12063 ins_encode %{
12064 __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12065 %}
12066 ins_pipe(ialu_mem_imm);
12067 %}
12068
12069 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12070 %{
12071 predicate(UseAPX);
12072 match(Set dst (RShiftI (LoadI src) shift));
12073 effect(KILL cr);
12074
12075 format %{ "esarl $dst, $src, $shift\t# int (ndd)" %}
12076 ins_encode %{
12077 __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12078 %}
12079 ins_pipe(ialu_mem_imm);
12080 %}
12081
12082 // Arithmetic Shift Right by 8-bit immediate
12083 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12084 %{
12085 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12086 effect(KILL cr);
12087
12088 format %{ "sarl $dst, $shift" %}
12089 ins_encode %{
12090 __ sarl($dst$$Address, $shift$$constant);
12091 %}
12092 ins_pipe(ialu_mem_imm);
12093 %}
12094
12095 // Arithmetic Shift Right by variable
12096 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12097 %{
12098 predicate(!VM_Version::supports_bmi2());
12099 match(Set dst (RShiftI dst shift));
12100 effect(KILL cr);
12101
12102 format %{ "sarl $dst, $shift" %}
12103 ins_encode %{
12104 __ sarl($dst$$Register);
12105 %}
12106 ins_pipe(ialu_reg_reg);
12107 %}
12108
12109 // Arithmetic Shift Right by variable
12110 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12111 %{
12112 predicate(!VM_Version::supports_bmi2());
12113 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12114 effect(KILL cr);
12115
12116 format %{ "sarl $dst, $shift" %}
12117 ins_encode %{
12118 __ sarl($dst$$Address);
12119 %}
12120 ins_pipe(ialu_mem_reg);
12121 %}
12122
12123 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12124 %{
12125 predicate(VM_Version::supports_bmi2());
12126 match(Set dst (RShiftI src shift));
12127
12128 format %{ "sarxl $dst, $src, $shift" %}
12129 ins_encode %{
12130 __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12131 %}
12132 ins_pipe(ialu_reg_reg);
12133 %}
12134
12135 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12136 %{
12137 predicate(VM_Version::supports_bmi2());
12138 match(Set dst (RShiftI (LoadI src) shift));
12139 ins_cost(175);
12140 format %{ "sarxl $dst, $src, $shift" %}
12141 ins_encode %{
12142 __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12143 %}
12144 ins_pipe(ialu_reg_mem);
12145 %}
12146
12147 // Logical Shift Right by 8-bit immediate
12148 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12149 %{
12150 predicate(!UseAPX);
12151 match(Set dst (URShiftI dst shift));
12152 effect(KILL cr);
12153
12154 format %{ "shrl $dst, $shift" %}
12155 ins_encode %{
12156 __ shrl($dst$$Register, $shift$$constant);
12157 %}
12158 ins_pipe(ialu_reg);
12159 %}
12160
12161 // Logical Shift Right by 8-bit immediate
12162 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12163 %{
12164 predicate(UseAPX);
12165 match(Set dst (URShiftI src shift));
12166 effect(KILL cr);
12167 flag(PD::Flag_ndd_demotable_opr1);
12168
12169 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12170 ins_encode %{
12171 __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12172 %}
12173 ins_pipe(ialu_reg);
12174 %}
12175
12176 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12177 %{
12178 predicate(UseAPX);
12179 match(Set dst (URShiftI (LoadI src) shift));
12180 effect(KILL cr);
12181
12182 format %{ "eshrl $dst, $src, $shift\t # int (ndd)" %}
12183 ins_encode %{
12184 __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12185 %}
12186 ins_pipe(ialu_reg);
12187 %}
12188
12189 // Logical Shift Right by 8-bit immediate
12190 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12191 %{
12192 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12193 effect(KILL cr);
12194
12195 format %{ "shrl $dst, $shift" %}
12196 ins_encode %{
12197 __ shrl($dst$$Address, $shift$$constant);
12198 %}
12199 ins_pipe(ialu_mem_imm);
12200 %}
12201
12202 // Logical Shift Right by variable
12203 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12204 %{
12205 predicate(!VM_Version::supports_bmi2());
12206 match(Set dst (URShiftI dst shift));
12207 effect(KILL cr);
12208
12209 format %{ "shrl $dst, $shift" %}
12210 ins_encode %{
12211 __ shrl($dst$$Register);
12212 %}
12213 ins_pipe(ialu_reg_reg);
12214 %}
12215
12216 // Logical Shift Right by variable
12217 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12218 %{
12219 predicate(!VM_Version::supports_bmi2());
12220 match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12221 effect(KILL cr);
12222
12223 format %{ "shrl $dst, $shift" %}
12224 ins_encode %{
12225 __ shrl($dst$$Address);
12226 %}
12227 ins_pipe(ialu_mem_reg);
12228 %}
12229
12230 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12231 %{
12232 predicate(VM_Version::supports_bmi2());
12233 match(Set dst (URShiftI src shift));
12234
12235 format %{ "shrxl $dst, $src, $shift" %}
12236 ins_encode %{
12237 __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12238 %}
12239 ins_pipe(ialu_reg_reg);
12240 %}
12241
12242 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12243 %{
12244 predicate(VM_Version::supports_bmi2());
12245 match(Set dst (URShiftI (LoadI src) shift));
12246 ins_cost(175);
12247 format %{ "shrxl $dst, $src, $shift" %}
12248 ins_encode %{
12249 __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12250 %}
12251 ins_pipe(ialu_reg_mem);
12252 %}
12253
12254 // Long Shift Instructions
12255 // Shift Left by one, two, three
12256 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12257 %{
12258 predicate(!UseAPX);
12259 match(Set dst (LShiftL dst shift));
12260 effect(KILL cr);
12261
12262 format %{ "salq $dst, $shift" %}
12263 ins_encode %{
12264 __ salq($dst$$Register, $shift$$constant);
12265 %}
12266 ins_pipe(ialu_reg);
12267 %}
12268
12269 // Shift Left by one, two, three
12270 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12271 %{
12272 predicate(UseAPX);
12273 match(Set dst (LShiftL src shift));
12274 effect(KILL cr);
12275 flag(PD::Flag_ndd_demotable_opr1);
12276
12277 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12278 ins_encode %{
12279 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12280 %}
12281 ins_pipe(ialu_reg);
12282 %}
12283
12284 // Shift Left by 8-bit immediate
12285 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12286 %{
12287 predicate(!UseAPX);
12288 match(Set dst (LShiftL dst shift));
12289 effect(KILL cr);
12290
12291 format %{ "salq $dst, $shift" %}
12292 ins_encode %{
12293 __ salq($dst$$Register, $shift$$constant);
12294 %}
12295 ins_pipe(ialu_reg);
12296 %}
12297
12298 // Shift Left by 8-bit immediate
12299 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12300 %{
12301 predicate(UseAPX);
12302 match(Set dst (LShiftL src shift));
12303 effect(KILL cr);
12304 flag(PD::Flag_ndd_demotable_opr1);
12305
12306 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12307 ins_encode %{
12308 __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12309 %}
12310 ins_pipe(ialu_reg);
12311 %}
12312
12313 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12314 %{
12315 predicate(UseAPX);
12316 match(Set dst (LShiftL (LoadL src) shift));
12317 effect(KILL cr);
12318
12319 format %{ "esalq $dst, $src, $shift\t# long (ndd)" %}
12320 ins_encode %{
12321 __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12322 %}
12323 ins_pipe(ialu_reg);
12324 %}
12325
12326 // Shift Left by 8-bit immediate
12327 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12328 %{
12329 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12330 effect(KILL cr);
12331
12332 format %{ "salq $dst, $shift" %}
12333 ins_encode %{
12334 __ salq($dst$$Address, $shift$$constant);
12335 %}
12336 ins_pipe(ialu_mem_imm);
12337 %}
12338
12339 // Shift Left by variable
12340 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12341 %{
12342 predicate(!VM_Version::supports_bmi2());
12343 match(Set dst (LShiftL dst shift));
12344 effect(KILL cr);
12345
12346 format %{ "salq $dst, $shift" %}
12347 ins_encode %{
12348 __ salq($dst$$Register);
12349 %}
12350 ins_pipe(ialu_reg_reg);
12351 %}
12352
12353 // Shift Left by variable
12354 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12355 %{
12356 predicate(!VM_Version::supports_bmi2());
12357 match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12358 effect(KILL cr);
12359
12360 format %{ "salq $dst, $shift" %}
12361 ins_encode %{
12362 __ salq($dst$$Address);
12363 %}
12364 ins_pipe(ialu_mem_reg);
12365 %}
12366
12367 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12368 %{
12369 predicate(VM_Version::supports_bmi2());
12370 match(Set dst (LShiftL src shift));
12371
12372 format %{ "shlxq $dst, $src, $shift" %}
12373 ins_encode %{
12374 __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12375 %}
12376 ins_pipe(ialu_reg_reg);
12377 %}
12378
12379 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12380 %{
12381 predicate(VM_Version::supports_bmi2());
12382 match(Set dst (LShiftL (LoadL src) shift));
12383 ins_cost(175);
12384 format %{ "shlxq $dst, $src, $shift" %}
12385 ins_encode %{
12386 __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12387 %}
12388 ins_pipe(ialu_reg_mem);
12389 %}
12390
12391 // Arithmetic Shift Right by 8-bit immediate
12392 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12393 %{
12394 predicate(!UseAPX);
12395 match(Set dst (RShiftL dst shift));
12396 effect(KILL cr);
12397
12398 format %{ "sarq $dst, $shift" %}
12399 ins_encode %{
12400 __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12401 %}
12402 ins_pipe(ialu_mem_imm);
12403 %}
12404
12405 // Arithmetic Shift Right by 8-bit immediate
12406 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12407 %{
12408 predicate(UseAPX);
12409 match(Set dst (RShiftL src shift));
12410 effect(KILL cr);
12411 flag(PD::Flag_ndd_demotable_opr1);
12412
12413 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12414 ins_encode %{
12415 __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12416 %}
12417 ins_pipe(ialu_mem_imm);
12418 %}
12419
12420 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12421 %{
12422 predicate(UseAPX);
12423 match(Set dst (RShiftL (LoadL src) shift));
12424 effect(KILL cr);
12425
12426 format %{ "esarq $dst, $src, $shift\t# long (ndd)" %}
12427 ins_encode %{
12428 __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12429 %}
12430 ins_pipe(ialu_mem_imm);
12431 %}
12432
12433 // Arithmetic Shift Right by 8-bit immediate
12434 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12435 %{
12436 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12437 effect(KILL cr);
12438
12439 format %{ "sarq $dst, $shift" %}
12440 ins_encode %{
12441 __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12442 %}
12443 ins_pipe(ialu_mem_imm);
12444 %}
12445
12446 // Arithmetic Shift Right by variable
12447 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12448 %{
12449 predicate(!VM_Version::supports_bmi2());
12450 match(Set dst (RShiftL dst shift));
12451 effect(KILL cr);
12452
12453 format %{ "sarq $dst, $shift" %}
12454 ins_encode %{
12455 __ sarq($dst$$Register);
12456 %}
12457 ins_pipe(ialu_reg_reg);
12458 %}
12459
12460 // Arithmetic Shift Right by variable
12461 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12462 %{
12463 predicate(!VM_Version::supports_bmi2());
12464 match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12465 effect(KILL cr);
12466
12467 format %{ "sarq $dst, $shift" %}
12468 ins_encode %{
12469 __ sarq($dst$$Address);
12470 %}
12471 ins_pipe(ialu_mem_reg);
12472 %}
12473
12474 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12475 %{
12476 predicate(VM_Version::supports_bmi2());
12477 match(Set dst (RShiftL src shift));
12478
12479 format %{ "sarxq $dst, $src, $shift" %}
12480 ins_encode %{
12481 __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12482 %}
12483 ins_pipe(ialu_reg_reg);
12484 %}
12485
12486 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12487 %{
12488 predicate(VM_Version::supports_bmi2());
12489 match(Set dst (RShiftL (LoadL src) shift));
12490 ins_cost(175);
12491 format %{ "sarxq $dst, $src, $shift" %}
12492 ins_encode %{
12493 __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12494 %}
12495 ins_pipe(ialu_reg_mem);
12496 %}
12497
12498 // Logical Shift Right by 8-bit immediate
12499 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12500 %{
12501 predicate(!UseAPX);
12502 match(Set dst (URShiftL dst shift));
12503 effect(KILL cr);
12504
12505 format %{ "shrq $dst, $shift" %}
12506 ins_encode %{
12507 __ shrq($dst$$Register, $shift$$constant);
12508 %}
12509 ins_pipe(ialu_reg);
12510 %}
12511
12512 // Logical Shift Right by 8-bit immediate
12513 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12514 %{
12515 predicate(UseAPX);
12516 match(Set dst (URShiftL src shift));
12517 effect(KILL cr);
12518 flag(PD::Flag_ndd_demotable_opr1);
12519
12520 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12521 ins_encode %{
12522 __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12523 %}
12524 ins_pipe(ialu_reg);
12525 %}
12526
12527 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12528 %{
12529 predicate(UseAPX);
12530 match(Set dst (URShiftL (LoadL src) shift));
12531 effect(KILL cr);
12532
12533 format %{ "eshrq $dst, $src, $shift\t# long (ndd)" %}
12534 ins_encode %{
12535 __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12536 %}
12537 ins_pipe(ialu_reg);
12538 %}
12539
12540 // Logical Shift Right by 8-bit immediate
12541 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12542 %{
12543 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12544 effect(KILL cr);
12545
12546 format %{ "shrq $dst, $shift" %}
12547 ins_encode %{
12548 __ shrq($dst$$Address, $shift$$constant);
12549 %}
12550 ins_pipe(ialu_mem_imm);
12551 %}
12552
12553 // Logical Shift Right by variable
12554 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12555 %{
12556 predicate(!VM_Version::supports_bmi2());
12557 match(Set dst (URShiftL dst shift));
12558 effect(KILL cr);
12559
12560 format %{ "shrq $dst, $shift" %}
12561 ins_encode %{
12562 __ shrq($dst$$Register);
12563 %}
12564 ins_pipe(ialu_reg_reg);
12565 %}
12566
12567 // Logical Shift Right by variable
12568 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12569 %{
12570 predicate(!VM_Version::supports_bmi2());
12571 match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12572 effect(KILL cr);
12573
12574 format %{ "shrq $dst, $shift" %}
12575 ins_encode %{
12576 __ shrq($dst$$Address);
12577 %}
12578 ins_pipe(ialu_mem_reg);
12579 %}
12580
12581 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12582 %{
12583 predicate(VM_Version::supports_bmi2());
12584 match(Set dst (URShiftL src shift));
12585
12586 format %{ "shrxq $dst, $src, $shift" %}
12587 ins_encode %{
12588 __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12589 %}
12590 ins_pipe(ialu_reg_reg);
12591 %}
12592
12593 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12594 %{
12595 predicate(VM_Version::supports_bmi2());
12596 match(Set dst (URShiftL (LoadL src) shift));
12597 ins_cost(175);
12598 format %{ "shrxq $dst, $src, $shift" %}
12599 ins_encode %{
12600 __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12601 %}
12602 ins_pipe(ialu_reg_mem);
12603 %}
12604
12605 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12606 // This idiom is used by the compiler for the i2b bytecode.
12607 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12608 %{
12609 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12610
12611 format %{ "movsbl $dst, $src\t# i2b" %}
12612 ins_encode %{
12613 __ movsbl($dst$$Register, $src$$Register);
12614 %}
12615 ins_pipe(ialu_reg_reg);
12616 %}
12617
12618 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12619 // This idiom is used by the compiler the i2s bytecode.
12620 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12621 %{
12622 match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12623
12624 format %{ "movswl $dst, $src\t# i2s" %}
12625 ins_encode %{
12626 __ movswl($dst$$Register, $src$$Register);
12627 %}
12628 ins_pipe(ialu_reg_reg);
12629 %}
12630
12631 // ROL/ROR instructions
12632
12633 // Rotate left by constant.
12634 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12635 %{
12636 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12637 match(Set dst (RotateLeft dst shift));
12638 effect(KILL cr);
12639 format %{ "roll $dst, $shift" %}
12640 ins_encode %{
12641 __ roll($dst$$Register, $shift$$constant);
12642 %}
12643 ins_pipe(ialu_reg);
12644 %}
12645
12646 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12647 %{
12648 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12649 match(Set dst (RotateLeft src shift));
12650 format %{ "rolxl $dst, $src, $shift" %}
12651 ins_encode %{
12652 int shift = 32 - ($shift$$constant & 31);
12653 __ rorxl($dst$$Register, $src$$Register, shift);
12654 %}
12655 ins_pipe(ialu_reg_reg);
12656 %}
12657
12658 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12659 %{
12660 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12661 match(Set dst (RotateLeft (LoadI src) shift));
12662 ins_cost(175);
12663 format %{ "rolxl $dst, $src, $shift" %}
12664 ins_encode %{
12665 int shift = 32 - ($shift$$constant & 31);
12666 __ rorxl($dst$$Register, $src$$Address, shift);
12667 %}
12668 ins_pipe(ialu_reg_mem);
12669 %}
12670
12671 // Rotate Left by variable
12672 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12673 %{
12674 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12675 match(Set dst (RotateLeft dst shift));
12676 effect(KILL cr);
12677 format %{ "roll $dst, $shift" %}
12678 ins_encode %{
12679 __ roll($dst$$Register);
12680 %}
12681 ins_pipe(ialu_reg_reg);
12682 %}
12683
12684 // Rotate Left by variable
12685 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12686 %{
12687 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12688 match(Set dst (RotateLeft src shift));
12689 effect(KILL cr);
12690 flag(PD::Flag_ndd_demotable_opr1);
12691
12692 format %{ "eroll $dst, $src, $shift\t# rotate left (int ndd)" %}
12693 ins_encode %{
12694 __ eroll($dst$$Register, $src$$Register, false);
12695 %}
12696 ins_pipe(ialu_reg_reg);
12697 %}
12698
12699 // Rotate Right by constant.
12700 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12701 %{
12702 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12703 match(Set dst (RotateRight dst shift));
12704 effect(KILL cr);
12705 format %{ "rorl $dst, $shift" %}
12706 ins_encode %{
12707 __ rorl($dst$$Register, $shift$$constant);
12708 %}
12709 ins_pipe(ialu_reg);
12710 %}
12711
12712 // Rotate Right by constant.
12713 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12714 %{
12715 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12716 match(Set dst (RotateRight src shift));
12717 format %{ "rorxl $dst, $src, $shift" %}
12718 ins_encode %{
12719 __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12720 %}
12721 ins_pipe(ialu_reg_reg);
12722 %}
12723
12724 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12725 %{
12726 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12727 match(Set dst (RotateRight (LoadI src) shift));
12728 ins_cost(175);
12729 format %{ "rorxl $dst, $src, $shift" %}
12730 ins_encode %{
12731 __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12732 %}
12733 ins_pipe(ialu_reg_mem);
12734 %}
12735
12736 // Rotate Right by variable
12737 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12738 %{
12739 predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12740 match(Set dst (RotateRight dst shift));
12741 effect(KILL cr);
12742 format %{ "rorl $dst, $shift" %}
12743 ins_encode %{
12744 __ rorl($dst$$Register);
12745 %}
12746 ins_pipe(ialu_reg_reg);
12747 %}
12748
12749 // Rotate Right by variable
12750 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12751 %{
12752 predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12753 match(Set dst (RotateRight src shift));
12754 effect(KILL cr);
12755 flag(PD::Flag_ndd_demotable_opr1);
12756
12757 format %{ "erorl $dst, $src, $shift\t# rotate right(int ndd)" %}
12758 ins_encode %{
12759 __ erorl($dst$$Register, $src$$Register, false);
12760 %}
12761 ins_pipe(ialu_reg_reg);
12762 %}
12763
12764 // Rotate Left by constant.
12765 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12766 %{
12767 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12768 match(Set dst (RotateLeft dst shift));
12769 effect(KILL cr);
12770 format %{ "rolq $dst, $shift" %}
12771 ins_encode %{
12772 __ rolq($dst$$Register, $shift$$constant);
12773 %}
12774 ins_pipe(ialu_reg);
12775 %}
12776
12777 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12778 %{
12779 predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12780 match(Set dst (RotateLeft src shift));
12781 format %{ "rolxq $dst, $src, $shift" %}
12782 ins_encode %{
12783 int shift = 64 - ($shift$$constant & 63);
12784 __ rorxq($dst$$Register, $src$$Register, shift);
12785 %}
12786 ins_pipe(ialu_reg_reg);
12787 %}
12788
12789 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12790 %{
12791 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12792 match(Set dst (RotateLeft (LoadL src) shift));
12793 ins_cost(175);
12794 format %{ "rolxq $dst, $src, $shift" %}
12795 ins_encode %{
12796 int shift = 64 - ($shift$$constant & 63);
12797 __ rorxq($dst$$Register, $src$$Address, shift);
12798 %}
12799 ins_pipe(ialu_reg_mem);
12800 %}
12801
12802 // Rotate Left by variable
12803 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12804 %{
12805 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12806 match(Set dst (RotateLeft dst shift));
12807 effect(KILL cr);
12808
12809 format %{ "rolq $dst, $shift" %}
12810 ins_encode %{
12811 __ rolq($dst$$Register);
12812 %}
12813 ins_pipe(ialu_reg_reg);
12814 %}
12815
12816 // Rotate Left by variable
12817 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12818 %{
12819 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12820 match(Set dst (RotateLeft src shift));
12821 effect(KILL cr);
12822 flag(PD::Flag_ndd_demotable_opr1);
12823
12824 format %{ "erolq $dst, $src, $shift\t# rotate left(long ndd)" %}
12825 ins_encode %{
12826 __ erolq($dst$$Register, $src$$Register, false);
12827 %}
12828 ins_pipe(ialu_reg_reg);
12829 %}
12830
12831 // Rotate Right by constant.
12832 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12833 %{
12834 predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12835 match(Set dst (RotateRight dst shift));
12836 effect(KILL cr);
12837 format %{ "rorq $dst, $shift" %}
12838 ins_encode %{
12839 __ rorq($dst$$Register, $shift$$constant);
12840 %}
12841 ins_pipe(ialu_reg);
12842 %}
12843
12844 // Rotate Right by constant
12845 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12846 %{
12847 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12848 match(Set dst (RotateRight src shift));
12849 format %{ "rorxq $dst, $src, $shift" %}
12850 ins_encode %{
12851 __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12852 %}
12853 ins_pipe(ialu_reg_reg);
12854 %}
12855
12856 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12857 %{
12858 predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12859 match(Set dst (RotateRight (LoadL src) shift));
12860 ins_cost(175);
12861 format %{ "rorxq $dst, $src, $shift" %}
12862 ins_encode %{
12863 __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12864 %}
12865 ins_pipe(ialu_reg_mem);
12866 %}
12867
12868 // Rotate Right by variable
12869 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12870 %{
12871 predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12872 match(Set dst (RotateRight dst shift));
12873 effect(KILL cr);
12874 format %{ "rorq $dst, $shift" %}
12875 ins_encode %{
12876 __ rorq($dst$$Register);
12877 %}
12878 ins_pipe(ialu_reg_reg);
12879 %}
12880
12881 // Rotate Right by variable
12882 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12883 %{
12884 predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12885 match(Set dst (RotateRight src shift));
12886 effect(KILL cr);
12887 flag(PD::Flag_ndd_demotable_opr1);
12888
12889 format %{ "erorq $dst, $src, $shift\t# rotate right(long ndd)" %}
12890 ins_encode %{
12891 __ erorq($dst$$Register, $src$$Register, false);
12892 %}
12893 ins_pipe(ialu_reg_reg);
12894 %}
12895
12896 //----------------------------- CompressBits/ExpandBits ------------------------
12897
12898 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12899 predicate(n->bottom_type()->isa_long());
12900 match(Set dst (CompressBits src mask));
12901 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12902 ins_encode %{
12903 __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12904 %}
12905 ins_pipe( pipe_slow );
12906 %}
12907
12908 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12909 predicate(n->bottom_type()->isa_long());
12910 match(Set dst (ExpandBits src mask));
12911 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12912 ins_encode %{
12913 __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12914 %}
12915 ins_pipe( pipe_slow );
12916 %}
12917
12918 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12919 predicate(n->bottom_type()->isa_long());
12920 match(Set dst (CompressBits src (LoadL mask)));
12921 format %{ "pextq $dst, $src, $mask\t! parallel bit extract" %}
12922 ins_encode %{
12923 __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12924 %}
12925 ins_pipe( pipe_slow );
12926 %}
12927
12928 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12929 predicate(n->bottom_type()->isa_long());
12930 match(Set dst (ExpandBits src (LoadL mask)));
12931 format %{ "pdepq $dst, $src, $mask\t! parallel bit deposit" %}
12932 ins_encode %{
12933 __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12934 %}
12935 ins_pipe( pipe_slow );
12936 %}
12937
12938
12939 // Logical Instructions
12940
12941 // Integer Logical Instructions
12942
12943 // And Instructions
12944 // And Register with Register
12945 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12946 %{
12947 predicate(!UseAPX);
12948 match(Set dst (AndI dst src));
12949 effect(KILL cr);
12950 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12951
12952 format %{ "andl $dst, $src\t# int" %}
12953 ins_encode %{
12954 __ andl($dst$$Register, $src$$Register);
12955 %}
12956 ins_pipe(ialu_reg_reg);
12957 %}
12958
12959 // And Register with Register using New Data Destination (NDD)
12960 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12961 %{
12962 predicate(UseAPX);
12963 match(Set dst (AndI src1 src2));
12964 effect(KILL cr);
12965 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12966
12967 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
12968 ins_encode %{
12969 __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12970
12971 %}
12972 ins_pipe(ialu_reg_reg);
12973 %}
12974
12975 // And Register with Immediate 255
12976 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12977 %{
12978 match(Set dst (AndI src mask));
12979
12980 format %{ "movzbl $dst, $src\t# int & 0xFF" %}
12981 ins_encode %{
12982 __ movzbl($dst$$Register, $src$$Register);
12983 %}
12984 ins_pipe(ialu_reg);
12985 %}
12986
12987 // And Register with Immediate 255 and promote to long
12988 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12989 %{
12990 match(Set dst (ConvI2L (AndI src mask)));
12991
12992 format %{ "movzbl $dst, $src\t# int & 0xFF -> long" %}
12993 ins_encode %{
12994 __ movzbl($dst$$Register, $src$$Register);
12995 %}
12996 ins_pipe(ialu_reg);
12997 %}
12998
12999 // And Register with Immediate 65535
13000 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13001 %{
13002 match(Set dst (AndI src mask));
13003
13004 format %{ "movzwl $dst, $src\t# int & 0xFFFF" %}
13005 ins_encode %{
13006 __ movzwl($dst$$Register, $src$$Register);
13007 %}
13008 ins_pipe(ialu_reg);
13009 %}
13010
13011 // And Register with Immediate 65535 and promote to long
13012 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13013 %{
13014 match(Set dst (ConvI2L (AndI src mask)));
13015
13016 format %{ "movzwl $dst, $src\t# int & 0xFFFF -> long" %}
13017 ins_encode %{
13018 __ movzwl($dst$$Register, $src$$Register);
13019 %}
13020 ins_pipe(ialu_reg);
13021 %}
13022
13023 // Can skip int2long conversions after AND with small bitmask
13024 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src, immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13025 %{
13026 predicate(VM_Version::supports_bmi2());
13027 ins_cost(125);
13028 effect(TEMP tmp, KILL cr);
13029 match(Set dst (ConvI2L (AndI src mask)));
13030 format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int & immI_Pow2M1 -> long" %}
13031 ins_encode %{
13032 __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13033 __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13034 %}
13035 ins_pipe(ialu_reg_reg);
13036 %}
13037
13038 // And Register with Immediate
13039 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13040 %{
13041 predicate(!UseAPX);
13042 match(Set dst (AndI dst src));
13043 effect(KILL cr);
13044 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13045
13046 format %{ "andl $dst, $src\t# int" %}
13047 ins_encode %{
13048 __ andl($dst$$Register, $src$$constant);
13049 %}
13050 ins_pipe(ialu_reg);
13051 %}
13052
13053 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13054 %{
13055 predicate(UseAPX);
13056 match(Set dst (AndI src1 src2));
13057 effect(KILL cr);
13058 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13059
13060 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13061 ins_encode %{
13062 __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13063 %}
13064 ins_pipe(ialu_reg);
13065 %}
13066
13067 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13068 %{
13069 predicate(UseAPX);
13070 match(Set dst (AndI (LoadI src1) src2));
13071 effect(KILL cr);
13072 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13073
13074 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13075 ins_encode %{
13076 __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13077 %}
13078 ins_pipe(ialu_reg);
13079 %}
13080
13081 // And Register with Memory
13082 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13083 %{
13084 predicate(!UseAPX);
13085 match(Set dst (AndI dst (LoadI src)));
13086 effect(KILL cr);
13087 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13088
13089 ins_cost(150);
13090 format %{ "andl $dst, $src\t# int" %}
13091 ins_encode %{
13092 __ andl($dst$$Register, $src$$Address);
13093 %}
13094 ins_pipe(ialu_reg_mem);
13095 %}
13096
13097 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13098 %{
13099 predicate(UseAPX);
13100 match(Set dst (AndI src1 (LoadI src2)));
13101 effect(KILL cr);
13102 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13103
13104 ins_cost(150);
13105 format %{ "eandl $dst, $src1, $src2\t# int ndd" %}
13106 ins_encode %{
13107 __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13108 %}
13109 ins_pipe(ialu_reg_mem);
13110 %}
13111
13112 // And Memory with Register
13113 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13114 %{
13115 match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13116 effect(KILL cr);
13117 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13118
13119 ins_cost(150);
13120 format %{ "andb $dst, $src\t# byte" %}
13121 ins_encode %{
13122 __ andb($dst$$Address, $src$$Register);
13123 %}
13124 ins_pipe(ialu_mem_reg);
13125 %}
13126
13127 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13128 %{
13129 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13130 effect(KILL cr);
13131 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13132
13133 ins_cost(150);
13134 format %{ "andl $dst, $src\t# int" %}
13135 ins_encode %{
13136 __ andl($dst$$Address, $src$$Register);
13137 %}
13138 ins_pipe(ialu_mem_reg);
13139 %}
13140
13141 // And Memory with Immediate
13142 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13143 %{
13144 match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13145 effect(KILL cr);
13146 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13147
13148 ins_cost(125);
13149 format %{ "andl $dst, $src\t# int" %}
13150 ins_encode %{
13151 __ andl($dst$$Address, $src$$constant);
13152 %}
13153 ins_pipe(ialu_mem_imm);
13154 %}
13155
13156 // BMI1 instructions
13157 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13158 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13159 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13160 effect(KILL cr);
13161 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13162
13163 ins_cost(125);
13164 format %{ "andnl $dst, $src1, $src2" %}
13165
13166 ins_encode %{
13167 __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13168 %}
13169 ins_pipe(ialu_reg_mem);
13170 %}
13171
13172 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13173 match(Set dst (AndI (XorI src1 minus_1) src2));
13174 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13175 effect(KILL cr);
13176 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13177
13178 format %{ "andnl $dst, $src1, $src2" %}
13179
13180 ins_encode %{
13181 __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13182 %}
13183 ins_pipe(ialu_reg);
13184 %}
13185
13186 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13187 match(Set dst (AndI (SubI imm_zero src) src));
13188 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13189 effect(KILL cr);
13190 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13191
13192 format %{ "blsil $dst, $src" %}
13193
13194 ins_encode %{
13195 __ blsil($dst$$Register, $src$$Register);
13196 %}
13197 ins_pipe(ialu_reg);
13198 %}
13199
13200 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13201 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13202 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13203 effect(KILL cr);
13204 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13205
13206 ins_cost(125);
13207 format %{ "blsil $dst, $src" %}
13208
13209 ins_encode %{
13210 __ blsil($dst$$Register, $src$$Address);
13211 %}
13212 ins_pipe(ialu_reg_mem);
13213 %}
13214
13215 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13216 %{
13217 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13218 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13219 effect(KILL cr);
13220 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13221
13222 ins_cost(125);
13223 format %{ "blsmskl $dst, $src" %}
13224
13225 ins_encode %{
13226 __ blsmskl($dst$$Register, $src$$Address);
13227 %}
13228 ins_pipe(ialu_reg_mem);
13229 %}
13230
13231 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13232 %{
13233 match(Set dst (XorI (AddI src minus_1) src));
13234 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13235 effect(KILL cr);
13236 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13237
13238 format %{ "blsmskl $dst, $src" %}
13239
13240 ins_encode %{
13241 __ blsmskl($dst$$Register, $src$$Register);
13242 %}
13243
13244 ins_pipe(ialu_reg);
13245 %}
13246
13247 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13248 %{
13249 match(Set dst (AndI (AddI src minus_1) src) );
13250 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13251 effect(KILL cr);
13252 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13253
13254 format %{ "blsrl $dst, $src" %}
13255
13256 ins_encode %{
13257 __ blsrl($dst$$Register, $src$$Register);
13258 %}
13259
13260 ins_pipe(ialu_reg_mem);
13261 %}
13262
13263 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13264 %{
13265 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13266 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13267 effect(KILL cr);
13268 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13269
13270 ins_cost(125);
13271 format %{ "blsrl $dst, $src" %}
13272
13273 ins_encode %{
13274 __ blsrl($dst$$Register, $src$$Address);
13275 %}
13276
13277 ins_pipe(ialu_reg);
13278 %}
13279
13280 // Or Instructions
13281 // Or Register with Register
13282 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13283 %{
13284 predicate(!UseAPX);
13285 match(Set dst (OrI dst src));
13286 effect(KILL cr);
13287 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13288
13289 format %{ "orl $dst, $src\t# int" %}
13290 ins_encode %{
13291 __ orl($dst$$Register, $src$$Register);
13292 %}
13293 ins_pipe(ialu_reg_reg);
13294 %}
13295
13296 // Or Register with Register using New Data Destination (NDD)
13297 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13298 %{
13299 predicate(UseAPX);
13300 match(Set dst (OrI src1 src2));
13301 effect(KILL cr);
13302 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13303
13304 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13305 ins_encode %{
13306 __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13307 %}
13308 ins_pipe(ialu_reg_reg);
13309 %}
13310
13311 // Or Register with Immediate
13312 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13313 %{
13314 predicate(!UseAPX);
13315 match(Set dst (OrI dst src));
13316 effect(KILL cr);
13317 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13318
13319 format %{ "orl $dst, $src\t# int" %}
13320 ins_encode %{
13321 __ orl($dst$$Register, $src$$constant);
13322 %}
13323 ins_pipe(ialu_reg);
13324 %}
13325
13326 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13327 %{
13328 predicate(UseAPX);
13329 match(Set dst (OrI src1 src2));
13330 effect(KILL cr);
13331 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13332
13333 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13334 ins_encode %{
13335 __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13336 %}
13337 ins_pipe(ialu_reg);
13338 %}
13339
13340 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13341 %{
13342 predicate(UseAPX);
13343 match(Set dst (OrI src1 src2));
13344 effect(KILL cr);
13345 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13346
13347 format %{ "eorl $dst, $src2, $src1\t# int ndd" %}
13348 ins_encode %{
13349 __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13350 %}
13351 ins_pipe(ialu_reg);
13352 %}
13353
13354 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13355 %{
13356 predicate(UseAPX);
13357 match(Set dst (OrI (LoadI src1) src2));
13358 effect(KILL cr);
13359 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13360
13361 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13362 ins_encode %{
13363 __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13364 %}
13365 ins_pipe(ialu_reg);
13366 %}
13367
13368 // Or Register with Memory
13369 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13370 %{
13371 predicate(!UseAPX);
13372 match(Set dst (OrI dst (LoadI src)));
13373 effect(KILL cr);
13374 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13375
13376 ins_cost(150);
13377 format %{ "orl $dst, $src\t# int" %}
13378 ins_encode %{
13379 __ orl($dst$$Register, $src$$Address);
13380 %}
13381 ins_pipe(ialu_reg_mem);
13382 %}
13383
13384 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13385 %{
13386 predicate(UseAPX);
13387 match(Set dst (OrI src1 (LoadI src2)));
13388 effect(KILL cr);
13389 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13390
13391 ins_cost(150);
13392 format %{ "eorl $dst, $src1, $src2\t# int ndd" %}
13393 ins_encode %{
13394 __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13395 %}
13396 ins_pipe(ialu_reg_mem);
13397 %}
13398
13399 // Or Memory with Register
13400 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13401 %{
13402 match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13403 effect(KILL cr);
13404 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13405
13406 ins_cost(150);
13407 format %{ "orb $dst, $src\t# byte" %}
13408 ins_encode %{
13409 __ orb($dst$$Address, $src$$Register);
13410 %}
13411 ins_pipe(ialu_mem_reg);
13412 %}
13413
13414 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13415 %{
13416 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13417 effect(KILL cr);
13418 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13419
13420 ins_cost(150);
13421 format %{ "orl $dst, $src\t# int" %}
13422 ins_encode %{
13423 __ orl($dst$$Address, $src$$Register);
13424 %}
13425 ins_pipe(ialu_mem_reg);
13426 %}
13427
13428 // Or Memory with Immediate
13429 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13430 %{
13431 match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13432 effect(KILL cr);
13433 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13434
13435 ins_cost(125);
13436 format %{ "orl $dst, $src\t# int" %}
13437 ins_encode %{
13438 __ orl($dst$$Address, $src$$constant);
13439 %}
13440 ins_pipe(ialu_mem_imm);
13441 %}
13442
13443 // Xor Instructions
13444 // Xor Register with Register
13445 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13446 %{
13447 predicate(!UseAPX);
13448 match(Set dst (XorI dst src));
13449 effect(KILL cr);
13450 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13451
13452 format %{ "xorl $dst, $src\t# int" %}
13453 ins_encode %{
13454 __ xorl($dst$$Register, $src$$Register);
13455 %}
13456 ins_pipe(ialu_reg_reg);
13457 %}
13458
13459 // Xor Register with Register using New Data Destination (NDD)
13460 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13461 %{
13462 predicate(UseAPX);
13463 match(Set dst (XorI src1 src2));
13464 effect(KILL cr);
13465 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13466
13467 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13468 ins_encode %{
13469 __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13470 %}
13471 ins_pipe(ialu_reg_reg);
13472 %}
13473
13474 // Xor Register with Immediate -1
13475 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13476 %{
13477 predicate(!UseAPX);
13478 match(Set dst (XorI dst imm));
13479
13480 format %{ "notl $dst" %}
13481 ins_encode %{
13482 __ notl($dst$$Register);
13483 %}
13484 ins_pipe(ialu_reg);
13485 %}
13486
13487 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13488 %{
13489 match(Set dst (XorI src imm));
13490 predicate(UseAPX);
13491 flag(PD::Flag_ndd_demotable_opr1);
13492
13493 format %{ "enotl $dst, $src" %}
13494 ins_encode %{
13495 __ enotl($dst$$Register, $src$$Register);
13496 %}
13497 ins_pipe(ialu_reg);
13498 %}
13499
13500 // Xor Register with Immediate
13501 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13502 %{
13503 // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13504 predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13505 match(Set dst (XorI dst src));
13506 effect(KILL cr);
13507 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13508
13509 format %{ "xorl $dst, $src\t# int" %}
13510 ins_encode %{
13511 __ xorl($dst$$Register, $src$$constant);
13512 %}
13513 ins_pipe(ialu_reg);
13514 %}
13515
13516 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13517 %{
13518 // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13519 predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13520 match(Set dst (XorI src1 src2));
13521 effect(KILL cr);
13522 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13523
13524 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13525 ins_encode %{
13526 __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13527 %}
13528 ins_pipe(ialu_reg);
13529 %}
13530
13531 // Xor Memory with Immediate
13532 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13533 %{
13534 predicate(UseAPX);
13535 match(Set dst (XorI (LoadI src1) src2));
13536 effect(KILL cr);
13537 ins_cost(150);
13538 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13539
13540 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13541 ins_encode %{
13542 __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13543 %}
13544 ins_pipe(ialu_reg);
13545 %}
13546
13547 // Xor Register with Memory
13548 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13549 %{
13550 predicate(!UseAPX);
13551 match(Set dst (XorI dst (LoadI src)));
13552 effect(KILL cr);
13553 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13554
13555 ins_cost(150);
13556 format %{ "xorl $dst, $src\t# int" %}
13557 ins_encode %{
13558 __ xorl($dst$$Register, $src$$Address);
13559 %}
13560 ins_pipe(ialu_reg_mem);
13561 %}
13562
13563 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13564 %{
13565 predicate(UseAPX);
13566 match(Set dst (XorI src1 (LoadI src2)));
13567 effect(KILL cr);
13568 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13569
13570 ins_cost(150);
13571 format %{ "exorl $dst, $src1, $src2\t# int ndd" %}
13572 ins_encode %{
13573 __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13574 %}
13575 ins_pipe(ialu_reg_mem);
13576 %}
13577
13578 // Xor Memory with Register
13579 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13580 %{
13581 match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13582 effect(KILL cr);
13583 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13584
13585 ins_cost(150);
13586 format %{ "xorb $dst, $src\t# byte" %}
13587 ins_encode %{
13588 __ xorb($dst$$Address, $src$$Register);
13589 %}
13590 ins_pipe(ialu_mem_reg);
13591 %}
13592
13593 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13594 %{
13595 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13596 effect(KILL cr);
13597 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13598
13599 ins_cost(150);
13600 format %{ "xorl $dst, $src\t# int" %}
13601 ins_encode %{
13602 __ xorl($dst$$Address, $src$$Register);
13603 %}
13604 ins_pipe(ialu_mem_reg);
13605 %}
13606
13607 // Xor Memory with Immediate
13608 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13609 %{
13610 match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13611 effect(KILL cr);
13612 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13613
13614 ins_cost(125);
13615 format %{ "xorl $dst, $src\t# int" %}
13616 ins_encode %{
13617 __ xorl($dst$$Address, $src$$constant);
13618 %}
13619 ins_pipe(ialu_mem_imm);
13620 %}
13621
13622
13623 // Long Logical Instructions
13624
13625 // And Instructions
13626 // And Register with Register
13627 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13628 %{
13629 predicate(!UseAPX);
13630 match(Set dst (AndL dst src));
13631 effect(KILL cr);
13632 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13633
13634 format %{ "andq $dst, $src\t# long" %}
13635 ins_encode %{
13636 __ andq($dst$$Register, $src$$Register);
13637 %}
13638 ins_pipe(ialu_reg_reg);
13639 %}
13640
13641 // And Register with Register using New Data Destination (NDD)
13642 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13643 %{
13644 predicate(UseAPX);
13645 match(Set dst (AndL src1 src2));
13646 effect(KILL cr);
13647 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13648
13649 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13650 ins_encode %{
13651 __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13652
13653 %}
13654 ins_pipe(ialu_reg_reg);
13655 %}
13656
13657 // And Register with Immediate 255
13658 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13659 %{
13660 match(Set dst (AndL src mask));
13661
13662 format %{ "movzbl $dst, $src\t# long & 0xFF" %}
13663 ins_encode %{
13664 // movzbl zeroes out the upper 32-bit and does not need REX.W
13665 __ movzbl($dst$$Register, $src$$Register);
13666 %}
13667 ins_pipe(ialu_reg);
13668 %}
13669
13670 // And Register with Immediate 65535
13671 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13672 %{
13673 match(Set dst (AndL src mask));
13674
13675 format %{ "movzwl $dst, $src\t# long & 0xFFFF" %}
13676 ins_encode %{
13677 // movzwl zeroes out the upper 32-bit and does not need REX.W
13678 __ movzwl($dst$$Register, $src$$Register);
13679 %}
13680 ins_pipe(ialu_reg);
13681 %}
13682
13683 // And Register with Immediate
13684 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13685 %{
13686 predicate(!UseAPX);
13687 match(Set dst (AndL dst src));
13688 effect(KILL cr);
13689 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13690
13691 format %{ "andq $dst, $src\t# long" %}
13692 ins_encode %{
13693 __ andq($dst$$Register, $src$$constant);
13694 %}
13695 ins_pipe(ialu_reg);
13696 %}
13697
13698 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13699 %{
13700 predicate(UseAPX);
13701 match(Set dst (AndL src1 src2));
13702 effect(KILL cr);
13703 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13704
13705 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13706 ins_encode %{
13707 __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13708 %}
13709 ins_pipe(ialu_reg);
13710 %}
13711
13712 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13713 %{
13714 predicate(UseAPX);
13715 match(Set dst (AndL (LoadL src1) src2));
13716 effect(KILL cr);
13717 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13718
13719 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13720 ins_encode %{
13721 __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13722 %}
13723 ins_pipe(ialu_reg);
13724 %}
13725
13726 // And Register with Memory
13727 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13728 %{
13729 predicate(!UseAPX);
13730 match(Set dst (AndL dst (LoadL src)));
13731 effect(KILL cr);
13732 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13733
13734 ins_cost(150);
13735 format %{ "andq $dst, $src\t# long" %}
13736 ins_encode %{
13737 __ andq($dst$$Register, $src$$Address);
13738 %}
13739 ins_pipe(ialu_reg_mem);
13740 %}
13741
13742 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13743 %{
13744 predicate(UseAPX);
13745 match(Set dst (AndL src1 (LoadL src2)));
13746 effect(KILL cr);
13747 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13748
13749 ins_cost(150);
13750 format %{ "eandq $dst, $src1, $src2\t# long ndd" %}
13751 ins_encode %{
13752 __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13753 %}
13754 ins_pipe(ialu_reg_mem);
13755 %}
13756
13757 // And Memory with Register
13758 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13759 %{
13760 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13761 effect(KILL cr);
13762 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13763
13764 ins_cost(150);
13765 format %{ "andq $dst, $src\t# long" %}
13766 ins_encode %{
13767 __ andq($dst$$Address, $src$$Register);
13768 %}
13769 ins_pipe(ialu_mem_reg);
13770 %}
13771
13772 // And Memory with Immediate
13773 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13774 %{
13775 match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13776 effect(KILL cr);
13777 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13778
13779 ins_cost(125);
13780 format %{ "andq $dst, $src\t# long" %}
13781 ins_encode %{
13782 __ andq($dst$$Address, $src$$constant);
13783 %}
13784 ins_pipe(ialu_mem_imm);
13785 %}
13786
13787 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13788 %{
13789 // con should be a pure 64-bit immediate given that not(con) is a power of 2
13790 // because AND/OR works well enough for 8/32-bit values.
13791 predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13792
13793 match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13794 effect(KILL cr);
13795
13796 ins_cost(125);
13797 format %{ "btrq $dst, log2(not($con))\t# long" %}
13798 ins_encode %{
13799 __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13800 %}
13801 ins_pipe(ialu_mem_imm);
13802 %}
13803
13804 // BMI1 instructions
13805 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13806 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13807 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13808 effect(KILL cr);
13809 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13810
13811 ins_cost(125);
13812 format %{ "andnq $dst, $src1, $src2" %}
13813
13814 ins_encode %{
13815 __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13816 %}
13817 ins_pipe(ialu_reg_mem);
13818 %}
13819
13820 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13821 match(Set dst (AndL (XorL src1 minus_1) src2));
13822 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13823 effect(KILL cr);
13824 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13825
13826 format %{ "andnq $dst, $src1, $src2" %}
13827
13828 ins_encode %{
13829 __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13830 %}
13831 ins_pipe(ialu_reg_mem);
13832 %}
13833
13834 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13835 match(Set dst (AndL (SubL imm_zero src) src));
13836 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13837 effect(KILL cr);
13838 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13839
13840 format %{ "blsiq $dst, $src" %}
13841
13842 ins_encode %{
13843 __ blsiq($dst$$Register, $src$$Register);
13844 %}
13845 ins_pipe(ialu_reg);
13846 %}
13847
13848 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13849 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13850 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13851 effect(KILL cr);
13852 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13853
13854 ins_cost(125);
13855 format %{ "blsiq $dst, $src" %}
13856
13857 ins_encode %{
13858 __ blsiq($dst$$Register, $src$$Address);
13859 %}
13860 ins_pipe(ialu_reg_mem);
13861 %}
13862
13863 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13864 %{
13865 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13866 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13867 effect(KILL cr);
13868 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13869
13870 ins_cost(125);
13871 format %{ "blsmskq $dst, $src" %}
13872
13873 ins_encode %{
13874 __ blsmskq($dst$$Register, $src$$Address);
13875 %}
13876 ins_pipe(ialu_reg_mem);
13877 %}
13878
13879 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13880 %{
13881 match(Set dst (XorL (AddL src minus_1) src));
13882 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13883 effect(KILL cr);
13884 flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13885
13886 format %{ "blsmskq $dst, $src" %}
13887
13888 ins_encode %{
13889 __ blsmskq($dst$$Register, $src$$Register);
13890 %}
13891
13892 ins_pipe(ialu_reg);
13893 %}
13894
13895 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13896 %{
13897 match(Set dst (AndL (AddL src minus_1) src) );
13898 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13899 effect(KILL cr);
13900 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13901
13902 format %{ "blsrq $dst, $src" %}
13903
13904 ins_encode %{
13905 __ blsrq($dst$$Register, $src$$Register);
13906 %}
13907
13908 ins_pipe(ialu_reg);
13909 %}
13910
13911 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13912 %{
13913 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13914 predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13915 effect(KILL cr);
13916 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13917
13918 ins_cost(125);
13919 format %{ "blsrq $dst, $src" %}
13920
13921 ins_encode %{
13922 __ blsrq($dst$$Register, $src$$Address);
13923 %}
13924
13925 ins_pipe(ialu_reg);
13926 %}
13927
13928 // Or Instructions
13929 // Or Register with Register
13930 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13931 %{
13932 predicate(!UseAPX);
13933 match(Set dst (OrL dst src));
13934 effect(KILL cr);
13935 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13936
13937 format %{ "orq $dst, $src\t# long" %}
13938 ins_encode %{
13939 __ orq($dst$$Register, $src$$Register);
13940 %}
13941 ins_pipe(ialu_reg_reg);
13942 %}
13943
13944 // Or Register with Register using New Data Destination (NDD)
13945 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13946 %{
13947 predicate(UseAPX);
13948 match(Set dst (OrL src1 src2));
13949 effect(KILL cr);
13950 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13951
13952 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13953 ins_encode %{
13954 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13955
13956 %}
13957 ins_pipe(ialu_reg_reg);
13958 %}
13959
13960 // Use any_RegP to match R15 (TLS register) without spilling.
13961 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13962 predicate(!UseAPX);
13963 match(Set dst (OrL dst (CastP2X src)));
13964 effect(KILL cr);
13965 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13966
13967 format %{ "orq $dst, $src\t# long" %}
13968 ins_encode %{
13969 __ orq($dst$$Register, $src$$Register);
13970 %}
13971 ins_pipe(ialu_reg_reg);
13972 %}
13973
13974 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13975 predicate(UseAPX);
13976 match(Set dst (OrL src1 (CastP2X src2)));
13977 effect(KILL cr);
13978 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13979
13980 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
13981 ins_encode %{
13982 __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13983 %}
13984 ins_pipe(ialu_reg_reg);
13985 %}
13986
13987 // Or Register with Immediate
13988 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13989 %{
13990 predicate(!UseAPX);
13991 match(Set dst (OrL dst src));
13992 effect(KILL cr);
13993 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13994
13995 format %{ "orq $dst, $src\t# long" %}
13996 ins_encode %{
13997 __ orq($dst$$Register, $src$$constant);
13998 %}
13999 ins_pipe(ialu_reg);
14000 %}
14001
14002 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14003 %{
14004 predicate(UseAPX);
14005 match(Set dst (OrL src1 src2));
14006 effect(KILL cr);
14007 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14008
14009 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14010 ins_encode %{
14011 __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14012 %}
14013 ins_pipe(ialu_reg);
14014 %}
14015
14016 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14017 %{
14018 predicate(UseAPX);
14019 match(Set dst (OrL src1 src2));
14020 effect(KILL cr);
14021 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14022
14023 format %{ "eorq $dst, $src2, $src1\t# long ndd" %}
14024 ins_encode %{
14025 __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14026 %}
14027 ins_pipe(ialu_reg);
14028 %}
14029
14030 // Or Memory with Immediate
14031 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14032 %{
14033 predicate(UseAPX);
14034 match(Set dst (OrL (LoadL src1) src2));
14035 effect(KILL cr);
14036 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14037
14038 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14039 ins_encode %{
14040 __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14041 %}
14042 ins_pipe(ialu_reg);
14043 %}
14044
14045 // Or Register with Memory
14046 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14047 %{
14048 predicate(!UseAPX);
14049 match(Set dst (OrL dst (LoadL src)));
14050 effect(KILL cr);
14051 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14052
14053 ins_cost(150);
14054 format %{ "orq $dst, $src\t# long" %}
14055 ins_encode %{
14056 __ orq($dst$$Register, $src$$Address);
14057 %}
14058 ins_pipe(ialu_reg_mem);
14059 %}
14060
14061 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14062 %{
14063 predicate(UseAPX);
14064 match(Set dst (OrL src1 (LoadL src2)));
14065 effect(KILL cr);
14066 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14067
14068 ins_cost(150);
14069 format %{ "eorq $dst, $src1, $src2\t# long ndd" %}
14070 ins_encode %{
14071 __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14072 %}
14073 ins_pipe(ialu_reg_mem);
14074 %}
14075
14076 // Or Memory with Register
14077 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14078 %{
14079 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14080 effect(KILL cr);
14081 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14082
14083 ins_cost(150);
14084 format %{ "orq $dst, $src\t# long" %}
14085 ins_encode %{
14086 __ orq($dst$$Address, $src$$Register);
14087 %}
14088 ins_pipe(ialu_mem_reg);
14089 %}
14090
14091 // Or Memory with Immediate
14092 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14093 %{
14094 match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14095 effect(KILL cr);
14096 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14097
14098 ins_cost(125);
14099 format %{ "orq $dst, $src\t# long" %}
14100 ins_encode %{
14101 __ orq($dst$$Address, $src$$constant);
14102 %}
14103 ins_pipe(ialu_mem_imm);
14104 %}
14105
14106 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14107 %{
14108 // con should be a pure 64-bit power of 2 immediate
14109 // because AND/OR works well enough for 8/32-bit values.
14110 predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14111
14112 match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14113 effect(KILL cr);
14114
14115 ins_cost(125);
14116 format %{ "btsq $dst, log2($con)\t# long" %}
14117 ins_encode %{
14118 __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14119 %}
14120 ins_pipe(ialu_mem_imm);
14121 %}
14122
14123 // Xor Instructions
14124 // Xor Register with Register
14125 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14126 %{
14127 predicate(!UseAPX);
14128 match(Set dst (XorL dst src));
14129 effect(KILL cr);
14130 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14131
14132 format %{ "xorq $dst, $src\t# long" %}
14133 ins_encode %{
14134 __ xorq($dst$$Register, $src$$Register);
14135 %}
14136 ins_pipe(ialu_reg_reg);
14137 %}
14138
14139 // Xor Register with Register using New Data Destination (NDD)
14140 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14141 %{
14142 predicate(UseAPX);
14143 match(Set dst (XorL src1 src2));
14144 effect(KILL cr);
14145 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14146
14147 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14148 ins_encode %{
14149 __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14150 %}
14151 ins_pipe(ialu_reg_reg);
14152 %}
14153
14154 // Xor Register with Immediate -1
14155 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14156 %{
14157 predicate(!UseAPX);
14158 match(Set dst (XorL dst imm));
14159
14160 format %{ "notq $dst" %}
14161 ins_encode %{
14162 __ notq($dst$$Register);
14163 %}
14164 ins_pipe(ialu_reg);
14165 %}
14166
14167 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14168 %{
14169 predicate(UseAPX);
14170 match(Set dst (XorL src imm));
14171 flag(PD::Flag_ndd_demotable_opr1);
14172
14173 format %{ "enotq $dst, $src" %}
14174 ins_encode %{
14175 __ enotq($dst$$Register, $src$$Register);
14176 %}
14177 ins_pipe(ialu_reg);
14178 %}
14179
14180 // Xor Register with Immediate
14181 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14182 %{
14183 // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14184 predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14185 match(Set dst (XorL dst src));
14186 effect(KILL cr);
14187 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14188
14189 format %{ "xorq $dst, $src\t# long" %}
14190 ins_encode %{
14191 __ xorq($dst$$Register, $src$$constant);
14192 %}
14193 ins_pipe(ialu_reg);
14194 %}
14195
14196 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14197 %{
14198 // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14199 predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14200 match(Set dst (XorL src1 src2));
14201 effect(KILL cr);
14202 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14203
14204 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14205 ins_encode %{
14206 __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14207 %}
14208 ins_pipe(ialu_reg);
14209 %}
14210
14211 // Xor Memory with Immediate
14212 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14213 %{
14214 predicate(UseAPX);
14215 match(Set dst (XorL (LoadL src1) src2));
14216 effect(KILL cr);
14217 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14218 ins_cost(150);
14219
14220 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14221 ins_encode %{
14222 __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14223 %}
14224 ins_pipe(ialu_reg);
14225 %}
14226
14227 // Xor Register with Memory
14228 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14229 %{
14230 predicate(!UseAPX);
14231 match(Set dst (XorL dst (LoadL src)));
14232 effect(KILL cr);
14233 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14234
14235 ins_cost(150);
14236 format %{ "xorq $dst, $src\t# long" %}
14237 ins_encode %{
14238 __ xorq($dst$$Register, $src$$Address);
14239 %}
14240 ins_pipe(ialu_reg_mem);
14241 %}
14242
14243 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14244 %{
14245 predicate(UseAPX);
14246 match(Set dst (XorL src1 (LoadL src2)));
14247 effect(KILL cr);
14248 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14249
14250 ins_cost(150);
14251 format %{ "exorq $dst, $src1, $src2\t# long ndd" %}
14252 ins_encode %{
14253 __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14254 %}
14255 ins_pipe(ialu_reg_mem);
14256 %}
14257
14258 // Xor Memory with Register
14259 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14260 %{
14261 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14262 effect(KILL cr);
14263 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14264
14265 ins_cost(150);
14266 format %{ "xorq $dst, $src\t# long" %}
14267 ins_encode %{
14268 __ xorq($dst$$Address, $src$$Register);
14269 %}
14270 ins_pipe(ialu_mem_reg);
14271 %}
14272
14273 // Xor Memory with Immediate
14274 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14275 %{
14276 match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14277 effect(KILL cr);
14278 flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14279
14280 ins_cost(125);
14281 format %{ "xorq $dst, $src\t# long" %}
14282 ins_encode %{
14283 __ xorq($dst$$Address, $src$$constant);
14284 %}
14285 ins_pipe(ialu_mem_imm);
14286 %}
14287
14288 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14289 %{
14290 match(Set dst (CmpLTMask p q));
14291 effect(KILL cr);
14292
14293 ins_cost(400);
14294 format %{ "cmpl $p, $q\t# cmpLTMask\n\t"
14295 "setcc $dst \t# emits setlt + movzbl or setzul for APX"
14296 "negl $dst" %}
14297 ins_encode %{
14298 __ cmpl($p$$Register, $q$$Register);
14299 __ setcc(Assembler::less, $dst$$Register);
14300 __ negl($dst$$Register);
14301 %}
14302 ins_pipe(pipe_slow);
14303 %}
14304
14305 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14306 %{
14307 match(Set dst (CmpLTMask dst zero));
14308 effect(KILL cr);
14309
14310 ins_cost(100);
14311 format %{ "sarl $dst, #31\t# cmpLTMask0" %}
14312 ins_encode %{
14313 __ sarl($dst$$Register, 31);
14314 %}
14315 ins_pipe(ialu_reg);
14316 %}
14317
14318 /* Better to save a register than avoid a branch */
14319 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14320 %{
14321 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14322 effect(KILL cr);
14323 ins_cost(300);
14324 format %{ "subl $p,$q\t# cadd_cmpLTMask\n\t"
14325 "jge done\n\t"
14326 "addl $p,$y\n"
14327 "done: " %}
14328 ins_encode %{
14329 Register Rp = $p$$Register;
14330 Register Rq = $q$$Register;
14331 Register Ry = $y$$Register;
14332 Label done;
14333 __ subl(Rp, Rq);
14334 __ jccb(Assembler::greaterEqual, done);
14335 __ addl(Rp, Ry);
14336 __ bind(done);
14337 %}
14338 ins_pipe(pipe_cmplt);
14339 %}
14340
14341 /* Better to save a register than avoid a branch */
14342 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14343 %{
14344 match(Set y (AndI (CmpLTMask p q) y));
14345 effect(KILL cr);
14346
14347 ins_cost(300);
14348
14349 format %{ "cmpl $p, $q\t# and_cmpLTMask\n\t"
14350 "jlt done\n\t"
14351 "xorl $y, $y\n"
14352 "done: " %}
14353 ins_encode %{
14354 Register Rp = $p$$Register;
14355 Register Rq = $q$$Register;
14356 Register Ry = $y$$Register;
14357 Label done;
14358 __ cmpl(Rp, Rq);
14359 __ jccb(Assembler::less, done);
14360 __ xorl(Ry, Ry);
14361 __ bind(done);
14362 %}
14363 ins_pipe(pipe_cmplt);
14364 %}
14365
14366
14367 //---------- FP Instructions------------------------------------------------
14368
14369 // Really expensive, avoid
14370 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14371 %{
14372 match(Set cr (CmpF src1 src2));
14373
14374 ins_cost(500);
14375 format %{ "ucomiss $src1, $src2\n\t"
14376 "jnp,s exit\n\t"
14377 "pushfq\t# saw NaN, set CF\n\t"
14378 "andq [rsp], #0xffffff2b\n\t"
14379 "popfq\n"
14380 "exit:" %}
14381 ins_encode %{
14382 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14383 emit_cmpfp_fixup(masm);
14384 %}
14385 ins_pipe(pipe_slow);
14386 %}
14387
14388 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14389 match(Set cr (CmpF src1 src2));
14390
14391 ins_cost(100);
14392 format %{ "ucomiss $src1, $src2" %}
14393 ins_encode %{
14394 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14395 %}
14396 ins_pipe(pipe_slow);
14397 %}
14398
14399 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14400 match(Set cr (CmpF src1 src2));
14401
14402 ins_cost(100);
14403 format %{ "evucomxss $src1, $src2" %}
14404 ins_encode %{
14405 __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14406 %}
14407 ins_pipe(pipe_slow);
14408 %}
14409
14410 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14411 match(Set cr (CmpF src1 (LoadF src2)));
14412
14413 ins_cost(100);
14414 format %{ "ucomiss $src1, $src2" %}
14415 ins_encode %{
14416 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14417 %}
14418 ins_pipe(pipe_slow);
14419 %}
14420
14421 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14422 match(Set cr (CmpF src1 (LoadF src2)));
14423
14424 ins_cost(100);
14425 format %{ "evucomxss $src1, $src2" %}
14426 ins_encode %{
14427 __ evucomxss($src1$$XMMRegister, $src2$$Address);
14428 %}
14429 ins_pipe(pipe_slow);
14430 %}
14431
14432 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14433 match(Set cr (CmpF src con));
14434
14435 ins_cost(100);
14436 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14437 ins_encode %{
14438 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14439 %}
14440 ins_pipe(pipe_slow);
14441 %}
14442
14443 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14444 match(Set cr (CmpF src con));
14445
14446 ins_cost(100);
14447 format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14448 ins_encode %{
14449 __ evucomxss($src$$XMMRegister, $constantaddress($con));
14450 %}
14451 ins_pipe(pipe_slow);
14452 %}
14453
14454 // Really expensive, avoid
14455 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14456 %{
14457 match(Set cr (CmpD src1 src2));
14458
14459 ins_cost(500);
14460 format %{ "ucomisd $src1, $src2\n\t"
14461 "jnp,s exit\n\t"
14462 "pushfq\t# saw NaN, set CF\n\t"
14463 "andq [rsp], #0xffffff2b\n\t"
14464 "popfq\n"
14465 "exit:" %}
14466 ins_encode %{
14467 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14468 emit_cmpfp_fixup(masm);
14469 %}
14470 ins_pipe(pipe_slow);
14471 %}
14472
14473 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14474 match(Set cr (CmpD src1 src2));
14475
14476 ins_cost(100);
14477 format %{ "ucomisd $src1, $src2 test" %}
14478 ins_encode %{
14479 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14480 %}
14481 ins_pipe(pipe_slow);
14482 %}
14483
14484 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14485 match(Set cr (CmpD src1 src2));
14486
14487 ins_cost(100);
14488 format %{ "evucomxsd $src1, $src2 test" %}
14489 ins_encode %{
14490 __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14491 %}
14492 ins_pipe(pipe_slow);
14493 %}
14494
14495 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14496 match(Set cr (CmpD src1 (LoadD src2)));
14497
14498 ins_cost(100);
14499 format %{ "ucomisd $src1, $src2" %}
14500 ins_encode %{
14501 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14502 %}
14503 ins_pipe(pipe_slow);
14504 %}
14505
14506 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14507 match(Set cr (CmpD src1 (LoadD src2)));
14508
14509 ins_cost(100);
14510 format %{ "evucomxsd $src1, $src2" %}
14511 ins_encode %{
14512 __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14513 %}
14514 ins_pipe(pipe_slow);
14515 %}
14516
14517 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14518 match(Set cr (CmpD src con));
14519 ins_cost(100);
14520 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14521 ins_encode %{
14522 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14523 %}
14524 ins_pipe(pipe_slow);
14525 %}
14526
14527 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14528 match(Set cr (CmpD src con));
14529
14530 ins_cost(100);
14531 format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14532 ins_encode %{
14533 __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14534 %}
14535 ins_pipe(pipe_slow);
14536 %}
14537
14538 // Compare into -1,0,1
14539 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14540 %{
14541 match(Set dst (CmpF3 src1 src2));
14542 effect(KILL cr);
14543
14544 ins_cost(275);
14545 format %{ "ucomiss $src1, $src2\n\t"
14546 "movl $dst, #-1\n\t"
14547 "jp,s done\n\t"
14548 "jb,s done\n\t"
14549 "setne $dst\n\t"
14550 "movzbl $dst, $dst\n"
14551 "done:" %}
14552 ins_encode %{
14553 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14554 emit_cmpfp3(masm, $dst$$Register);
14555 %}
14556 ins_pipe(pipe_slow);
14557 %}
14558
14559 // Compare into -1,0,1
14560 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14561 %{
14562 match(Set dst (CmpF3 src1 (LoadF src2)));
14563 effect(KILL cr);
14564
14565 ins_cost(275);
14566 format %{ "ucomiss $src1, $src2\n\t"
14567 "movl $dst, #-1\n\t"
14568 "jp,s done\n\t"
14569 "jb,s done\n\t"
14570 "setne $dst\n\t"
14571 "movzbl $dst, $dst\n"
14572 "done:" %}
14573 ins_encode %{
14574 __ ucomiss($src1$$XMMRegister, $src2$$Address);
14575 emit_cmpfp3(masm, $dst$$Register);
14576 %}
14577 ins_pipe(pipe_slow);
14578 %}
14579
14580 // Compare into -1,0,1
14581 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14582 match(Set dst (CmpF3 src con));
14583 effect(KILL cr);
14584
14585 ins_cost(275);
14586 format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14587 "movl $dst, #-1\n\t"
14588 "jp,s done\n\t"
14589 "jb,s done\n\t"
14590 "setne $dst\n\t"
14591 "movzbl $dst, $dst\n"
14592 "done:" %}
14593 ins_encode %{
14594 __ ucomiss($src$$XMMRegister, $constantaddress($con));
14595 emit_cmpfp3(masm, $dst$$Register);
14596 %}
14597 ins_pipe(pipe_slow);
14598 %}
14599
14600 // Compare into -1,0,1
14601 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14602 %{
14603 match(Set dst (CmpD3 src1 src2));
14604 effect(KILL cr);
14605
14606 ins_cost(275);
14607 format %{ "ucomisd $src1, $src2\n\t"
14608 "movl $dst, #-1\n\t"
14609 "jp,s done\n\t"
14610 "jb,s done\n\t"
14611 "setne $dst\n\t"
14612 "movzbl $dst, $dst\n"
14613 "done:" %}
14614 ins_encode %{
14615 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14616 emit_cmpfp3(masm, $dst$$Register);
14617 %}
14618 ins_pipe(pipe_slow);
14619 %}
14620
14621 // Compare into -1,0,1
14622 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14623 %{
14624 match(Set dst (CmpD3 src1 (LoadD src2)));
14625 effect(KILL cr);
14626
14627 ins_cost(275);
14628 format %{ "ucomisd $src1, $src2\n\t"
14629 "movl $dst, #-1\n\t"
14630 "jp,s done\n\t"
14631 "jb,s done\n\t"
14632 "setne $dst\n\t"
14633 "movzbl $dst, $dst\n"
14634 "done:" %}
14635 ins_encode %{
14636 __ ucomisd($src1$$XMMRegister, $src2$$Address);
14637 emit_cmpfp3(masm, $dst$$Register);
14638 %}
14639 ins_pipe(pipe_slow);
14640 %}
14641
14642 // Compare into -1,0,1
14643 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14644 match(Set dst (CmpD3 src con));
14645 effect(KILL cr);
14646
14647 ins_cost(275);
14648 format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14649 "movl $dst, #-1\n\t"
14650 "jp,s done\n\t"
14651 "jb,s done\n\t"
14652 "setne $dst\n\t"
14653 "movzbl $dst, $dst\n"
14654 "done:" %}
14655 ins_encode %{
14656 __ ucomisd($src$$XMMRegister, $constantaddress($con));
14657 emit_cmpfp3(masm, $dst$$Register);
14658 %}
14659 ins_pipe(pipe_slow);
14660 %}
14661
14662 //----------Arithmetic Conversion Instructions---------------------------------
14663
14664 instruct convF2D_reg_reg(regD dst, regF src)
14665 %{
14666 match(Set dst (ConvF2D src));
14667
14668 format %{ "cvtss2sd $dst, $src" %}
14669 ins_encode %{
14670 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14671 %}
14672 ins_pipe(pipe_slow); // XXX
14673 %}
14674
14675 instruct convF2D_reg_mem(regD dst, memory src)
14676 %{
14677 predicate(UseAVX == 0);
14678 match(Set dst (ConvF2D (LoadF src)));
14679
14680 format %{ "cvtss2sd $dst, $src" %}
14681 ins_encode %{
14682 __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14683 %}
14684 ins_pipe(pipe_slow); // XXX
14685 %}
14686
14687 instruct convD2F_reg_reg(regF dst, regD src)
14688 %{
14689 match(Set dst (ConvD2F src));
14690
14691 format %{ "cvtsd2ss $dst, $src" %}
14692 ins_encode %{
14693 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14694 %}
14695 ins_pipe(pipe_slow); // XXX
14696 %}
14697
14698 instruct convD2F_reg_mem(regF dst, memory src)
14699 %{
14700 predicate(UseAVX == 0);
14701 match(Set dst (ConvD2F (LoadD src)));
14702
14703 format %{ "cvtsd2ss $dst, $src" %}
14704 ins_encode %{
14705 __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14706 %}
14707 ins_pipe(pipe_slow); // XXX
14708 %}
14709
14710 // XXX do mem variants
14711 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14712 %{
14713 predicate(!VM_Version::supports_avx10_2());
14714 match(Set dst (ConvF2I src));
14715 effect(KILL cr);
14716 format %{ "convert_f2i $dst, $src" %}
14717 ins_encode %{
14718 __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14719 %}
14720 ins_pipe(pipe_slow);
14721 %}
14722
14723 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14724 %{
14725 predicate(VM_Version::supports_avx10_2());
14726 match(Set dst (ConvF2I src));
14727 format %{ "evcvttss2sisl $dst, $src" %}
14728 ins_encode %{
14729 __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14730 %}
14731 ins_pipe(pipe_slow);
14732 %}
14733
14734 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14735 %{
14736 predicate(VM_Version::supports_avx10_2());
14737 match(Set dst (ConvF2I (LoadF src)));
14738 format %{ "evcvttss2sisl $dst, $src" %}
14739 ins_encode %{
14740 __ evcvttss2sisl($dst$$Register, $src$$Address);
14741 %}
14742 ins_pipe(pipe_slow);
14743 %}
14744
14745 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14746 %{
14747 predicate(!VM_Version::supports_avx10_2());
14748 match(Set dst (ConvF2L src));
14749 effect(KILL cr);
14750 format %{ "convert_f2l $dst, $src"%}
14751 ins_encode %{
14752 __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14753 %}
14754 ins_pipe(pipe_slow);
14755 %}
14756
14757 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14758 %{
14759 predicate(VM_Version::supports_avx10_2());
14760 match(Set dst (ConvF2L src));
14761 format %{ "evcvttss2sisq $dst, $src" %}
14762 ins_encode %{
14763 __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14764 %}
14765 ins_pipe(pipe_slow);
14766 %}
14767
14768 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14769 %{
14770 predicate(VM_Version::supports_avx10_2());
14771 match(Set dst (ConvF2L (LoadF src)));
14772 format %{ "evcvttss2sisq $dst, $src" %}
14773 ins_encode %{
14774 __ evcvttss2sisq($dst$$Register, $src$$Address);
14775 %}
14776 ins_pipe(pipe_slow);
14777 %}
14778
14779 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14780 %{
14781 predicate(!VM_Version::supports_avx10_2());
14782 match(Set dst (ConvD2I src));
14783 effect(KILL cr);
14784 format %{ "convert_d2i $dst, $src"%}
14785 ins_encode %{
14786 __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14787 %}
14788 ins_pipe(pipe_slow);
14789 %}
14790
14791 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14792 %{
14793 predicate(VM_Version::supports_avx10_2());
14794 match(Set dst (ConvD2I src));
14795 format %{ "evcvttsd2sisl $dst, $src" %}
14796 ins_encode %{
14797 __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14798 %}
14799 ins_pipe(pipe_slow);
14800 %}
14801
14802 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14803 %{
14804 predicate(VM_Version::supports_avx10_2());
14805 match(Set dst (ConvD2I (LoadD src)));
14806 format %{ "evcvttsd2sisl $dst, $src" %}
14807 ins_encode %{
14808 __ evcvttsd2sisl($dst$$Register, $src$$Address);
14809 %}
14810 ins_pipe(pipe_slow);
14811 %}
14812
14813 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14814 %{
14815 predicate(!VM_Version::supports_avx10_2());
14816 match(Set dst (ConvD2L src));
14817 effect(KILL cr);
14818 format %{ "convert_d2l $dst, $src"%}
14819 ins_encode %{
14820 __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14821 %}
14822 ins_pipe(pipe_slow);
14823 %}
14824
14825 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14826 %{
14827 predicate(VM_Version::supports_avx10_2());
14828 match(Set dst (ConvD2L src));
14829 format %{ "evcvttsd2sisq $dst, $src" %}
14830 ins_encode %{
14831 __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14832 %}
14833 ins_pipe(pipe_slow);
14834 %}
14835
14836 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14837 %{
14838 predicate(VM_Version::supports_avx10_2());
14839 match(Set dst (ConvD2L (LoadD src)));
14840 format %{ "evcvttsd2sisq $dst, $src" %}
14841 ins_encode %{
14842 __ evcvttsd2sisq($dst$$Register, $src$$Address);
14843 %}
14844 ins_pipe(pipe_slow);
14845 %}
14846
14847 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14848 %{
14849 match(Set dst (RoundD src));
14850 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14851 format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14852 ins_encode %{
14853 __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14854 %}
14855 ins_pipe(pipe_slow);
14856 %}
14857
14858 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14859 %{
14860 match(Set dst (RoundF src));
14861 effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14862 format %{ "round_float $dst,$src" %}
14863 ins_encode %{
14864 __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14865 %}
14866 ins_pipe(pipe_slow);
14867 %}
14868
14869 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14870 %{
14871 predicate(!UseXmmI2F);
14872 match(Set dst (ConvI2F src));
14873
14874 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14875 ins_encode %{
14876 if (UseAVX > 0) {
14877 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14878 }
14879 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14880 %}
14881 ins_pipe(pipe_slow); // XXX
14882 %}
14883
14884 instruct convI2F_reg_mem(regF dst, memory src)
14885 %{
14886 predicate(UseAVX == 0);
14887 match(Set dst (ConvI2F (LoadI src)));
14888
14889 format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14890 ins_encode %{
14891 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14892 %}
14893 ins_pipe(pipe_slow); // XXX
14894 %}
14895
14896 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14897 %{
14898 predicate(!UseXmmI2D);
14899 match(Set dst (ConvI2D src));
14900
14901 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14902 ins_encode %{
14903 if (UseAVX > 0) {
14904 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14905 }
14906 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14907 %}
14908 ins_pipe(pipe_slow); // XXX
14909 %}
14910
14911 instruct convI2D_reg_mem(regD dst, memory src)
14912 %{
14913 predicate(UseAVX == 0);
14914 match(Set dst (ConvI2D (LoadI src)));
14915
14916 format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14917 ins_encode %{
14918 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14919 %}
14920 ins_pipe(pipe_slow); // XXX
14921 %}
14922
14923 instruct convXI2F_reg(regF dst, rRegI src)
14924 %{
14925 predicate(UseXmmI2F);
14926 match(Set dst (ConvI2F src));
14927
14928 format %{ "movdl $dst, $src\n\t"
14929 "cvtdq2psl $dst, $dst\t# i2f" %}
14930 ins_encode %{
14931 __ movdl($dst$$XMMRegister, $src$$Register);
14932 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14933 %}
14934 ins_pipe(pipe_slow); // XXX
14935 %}
14936
14937 instruct convXI2D_reg(regD dst, rRegI src)
14938 %{
14939 predicate(UseXmmI2D);
14940 match(Set dst (ConvI2D src));
14941
14942 format %{ "movdl $dst, $src\n\t"
14943 "cvtdq2pdl $dst, $dst\t# i2d" %}
14944 ins_encode %{
14945 __ movdl($dst$$XMMRegister, $src$$Register);
14946 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14947 %}
14948 ins_pipe(pipe_slow); // XXX
14949 %}
14950
14951 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14952 %{
14953 match(Set dst (ConvL2F src));
14954
14955 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14956 ins_encode %{
14957 if (UseAVX > 0) {
14958 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14959 }
14960 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14961 %}
14962 ins_pipe(pipe_slow); // XXX
14963 %}
14964
14965 instruct convL2F_reg_mem(regF dst, memory src)
14966 %{
14967 predicate(UseAVX == 0);
14968 match(Set dst (ConvL2F (LoadL src)));
14969
14970 format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14971 ins_encode %{
14972 __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14973 %}
14974 ins_pipe(pipe_slow); // XXX
14975 %}
14976
14977 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14978 %{
14979 match(Set dst (ConvL2D src));
14980
14981 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14982 ins_encode %{
14983 if (UseAVX > 0) {
14984 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14985 }
14986 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14987 %}
14988 ins_pipe(pipe_slow); // XXX
14989 %}
14990
14991 instruct convL2D_reg_mem(regD dst, memory src)
14992 %{
14993 predicate(UseAVX == 0);
14994 match(Set dst (ConvL2D (LoadL src)));
14995
14996 format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14997 ins_encode %{
14998 __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14999 %}
15000 ins_pipe(pipe_slow); // XXX
15001 %}
15002
15003 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15004 %{
15005 match(Set dst (ConvI2L src));
15006
15007 ins_cost(125);
15008 format %{ "movslq $dst, $src\t# i2l" %}
15009 ins_encode %{
15010 __ movslq($dst$$Register, $src$$Register);
15011 %}
15012 ins_pipe(ialu_reg_reg);
15013 %}
15014
15015 // Zero-extend convert int to long
15016 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15017 %{
15018 match(Set dst (AndL (ConvI2L src) mask));
15019
15020 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15021 ins_encode %{
15022 if ($dst$$reg != $src$$reg) {
15023 __ movl($dst$$Register, $src$$Register);
15024 }
15025 %}
15026 ins_pipe(ialu_reg_reg);
15027 %}
15028
15029 // Zero-extend convert int to long
15030 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15031 %{
15032 match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15033
15034 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
15035 ins_encode %{
15036 __ movl($dst$$Register, $src$$Address);
15037 %}
15038 ins_pipe(ialu_reg_mem);
15039 %}
15040
15041 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15042 %{
15043 match(Set dst (AndL src mask));
15044
15045 format %{ "movl $dst, $src\t# zero-extend long" %}
15046 ins_encode %{
15047 __ movl($dst$$Register, $src$$Register);
15048 %}
15049 ins_pipe(ialu_reg_reg);
15050 %}
15051
15052 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15053 %{
15054 match(Set dst (ConvL2I src));
15055
15056 format %{ "movl $dst, $src\t# l2i" %}
15057 ins_encode %{
15058 __ movl($dst$$Register, $src$$Register);
15059 %}
15060 ins_pipe(ialu_reg_reg);
15061 %}
15062
15063
15064 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15065 match(Set dst (MoveF2I src));
15066 effect(DEF dst, USE src);
15067
15068 ins_cost(125);
15069 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
15070 ins_encode %{
15071 __ movl($dst$$Register, Address(rsp, $src$$disp));
15072 %}
15073 ins_pipe(ialu_reg_mem);
15074 %}
15075
15076 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15077 match(Set dst (MoveI2F src));
15078 effect(DEF dst, USE src);
15079
15080 ins_cost(125);
15081 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
15082 ins_encode %{
15083 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15084 %}
15085 ins_pipe(pipe_slow);
15086 %}
15087
15088 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15089 match(Set dst (MoveD2L src));
15090 effect(DEF dst, USE src);
15091
15092 ins_cost(125);
15093 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
15094 ins_encode %{
15095 __ movq($dst$$Register, Address(rsp, $src$$disp));
15096 %}
15097 ins_pipe(ialu_reg_mem);
15098 %}
15099
15100 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15101 predicate(!UseXmmLoadAndClearUpper);
15102 match(Set dst (MoveL2D src));
15103 effect(DEF dst, USE src);
15104
15105 ins_cost(125);
15106 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
15107 ins_encode %{
15108 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15109 %}
15110 ins_pipe(pipe_slow);
15111 %}
15112
15113 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15114 predicate(UseXmmLoadAndClearUpper);
15115 match(Set dst (MoveL2D src));
15116 effect(DEF dst, USE src);
15117
15118 ins_cost(125);
15119 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
15120 ins_encode %{
15121 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15122 %}
15123 ins_pipe(pipe_slow);
15124 %}
15125
15126
15127 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15128 match(Set dst (MoveF2I src));
15129 effect(DEF dst, USE src);
15130
15131 ins_cost(95); // XXX
15132 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
15133 ins_encode %{
15134 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15135 %}
15136 ins_pipe(pipe_slow);
15137 %}
15138
15139 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15140 match(Set dst (MoveI2F src));
15141 effect(DEF dst, USE src);
15142
15143 ins_cost(100);
15144 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
15145 ins_encode %{
15146 __ movl(Address(rsp, $dst$$disp), $src$$Register);
15147 %}
15148 ins_pipe( ialu_mem_reg );
15149 %}
15150
15151 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15152 match(Set dst (MoveD2L src));
15153 effect(DEF dst, USE src);
15154
15155 ins_cost(95); // XXX
15156 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
15157 ins_encode %{
15158 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15159 %}
15160 ins_pipe(pipe_slow);
15161 %}
15162
15163 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15164 match(Set dst (MoveL2D src));
15165 effect(DEF dst, USE src);
15166
15167 ins_cost(100);
15168 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
15169 ins_encode %{
15170 __ movq(Address(rsp, $dst$$disp), $src$$Register);
15171 %}
15172 ins_pipe(ialu_mem_reg);
15173 %}
15174
15175 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15176 match(Set dst (MoveF2I src));
15177 effect(DEF dst, USE src);
15178 ins_cost(85);
15179 format %{ "movd $dst,$src\t# MoveF2I" %}
15180 ins_encode %{
15181 __ movdl($dst$$Register, $src$$XMMRegister);
15182 %}
15183 ins_pipe( pipe_slow );
15184 %}
15185
15186 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15187 match(Set dst (MoveD2L src));
15188 effect(DEF dst, USE src);
15189 ins_cost(85);
15190 format %{ "movd $dst,$src\t# MoveD2L" %}
15191 ins_encode %{
15192 __ movdq($dst$$Register, $src$$XMMRegister);
15193 %}
15194 ins_pipe( pipe_slow );
15195 %}
15196
15197 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15198 match(Set dst (MoveI2F src));
15199 effect(DEF dst, USE src);
15200 ins_cost(100);
15201 format %{ "movd $dst,$src\t# MoveI2F" %}
15202 ins_encode %{
15203 __ movdl($dst$$XMMRegister, $src$$Register);
15204 %}
15205 ins_pipe( pipe_slow );
15206 %}
15207
15208 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15209 match(Set dst (MoveL2D src));
15210 effect(DEF dst, USE src);
15211 ins_cost(100);
15212 format %{ "movd $dst,$src\t# MoveL2D" %}
15213 ins_encode %{
15214 __ movdq($dst$$XMMRegister, $src$$Register);
15215 %}
15216 ins_pipe( pipe_slow );
15217 %}
15218
15219 // Fast clearing of an array
15220 // Small non-constant lenght ClearArray for non-AVX512 targets.
15221 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15222 Universe dummy, rFlagsReg cr)
15223 %{
15224 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15225 match(Set dummy (ClearArray cnt base));
15226 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15227
15228 format %{ $$template
15229 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15230 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15231 $$emit$$"jg LARGE\n\t"
15232 $$emit$$"dec rcx\n\t"
15233 $$emit$$"js DONE\t# Zero length\n\t"
15234 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15235 $$emit$$"dec rcx\n\t"
15236 $$emit$$"jge LOOP\n\t"
15237 $$emit$$"jmp DONE\n\t"
15238 $$emit$$"# LARGE:\n\t"
15239 if (UseFastStosb) {
15240 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15241 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15242 } else if (UseXMMForObjInit) {
15243 $$emit$$"mov rdi,rax\n\t"
15244 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15245 $$emit$$"jmpq L_zero_64_bytes\n\t"
15246 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15247 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15248 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15249 $$emit$$"add 0x40,rax\n\t"
15250 $$emit$$"# L_zero_64_bytes:\n\t"
15251 $$emit$$"sub 0x8,rcx\n\t"
15252 $$emit$$"jge L_loop\n\t"
15253 $$emit$$"add 0x4,rcx\n\t"
15254 $$emit$$"jl L_tail\n\t"
15255 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15256 $$emit$$"add 0x20,rax\n\t"
15257 $$emit$$"sub 0x4,rcx\n\t"
15258 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15259 $$emit$$"add 0x4,rcx\n\t"
15260 $$emit$$"jle L_end\n\t"
15261 $$emit$$"dec rcx\n\t"
15262 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15263 $$emit$$"vmovq xmm0,(rax)\n\t"
15264 $$emit$$"add 0x8,rax\n\t"
15265 $$emit$$"dec rcx\n\t"
15266 $$emit$$"jge L_sloop\n\t"
15267 $$emit$$"# L_end:\n\t"
15268 } else {
15269 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15270 }
15271 $$emit$$"# DONE"
15272 %}
15273 ins_encode %{
15274 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15275 $tmp$$XMMRegister, false, knoreg);
15276 %}
15277 ins_pipe(pipe_slow);
15278 %}
15279
15280 // Small non-constant length ClearArray for AVX512 targets.
15281 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15282 Universe dummy, rFlagsReg cr)
15283 %{
15284 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15285 match(Set dummy (ClearArray cnt base));
15286 ins_cost(125);
15287 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15288
15289 format %{ $$template
15290 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15291 $$emit$$"cmp InitArrayShortSize,rcx\n\t"
15292 $$emit$$"jg LARGE\n\t"
15293 $$emit$$"dec rcx\n\t"
15294 $$emit$$"js DONE\t# Zero length\n\t"
15295 $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
15296 $$emit$$"dec rcx\n\t"
15297 $$emit$$"jge LOOP\n\t"
15298 $$emit$$"jmp DONE\n\t"
15299 $$emit$$"# LARGE:\n\t"
15300 if (UseFastStosb) {
15301 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15302 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
15303 } else if (UseXMMForObjInit) {
15304 $$emit$$"mov rdi,rax\n\t"
15305 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15306 $$emit$$"jmpq L_zero_64_bytes\n\t"
15307 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15308 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15309 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15310 $$emit$$"add 0x40,rax\n\t"
15311 $$emit$$"# L_zero_64_bytes:\n\t"
15312 $$emit$$"sub 0x8,rcx\n\t"
15313 $$emit$$"jge L_loop\n\t"
15314 $$emit$$"add 0x4,rcx\n\t"
15315 $$emit$$"jl L_tail\n\t"
15316 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15317 $$emit$$"add 0x20,rax\n\t"
15318 $$emit$$"sub 0x4,rcx\n\t"
15319 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15320 $$emit$$"add 0x4,rcx\n\t"
15321 $$emit$$"jle L_end\n\t"
15322 $$emit$$"dec rcx\n\t"
15323 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15324 $$emit$$"vmovq xmm0,(rax)\n\t"
15325 $$emit$$"add 0x8,rax\n\t"
15326 $$emit$$"dec rcx\n\t"
15327 $$emit$$"jge L_sloop\n\t"
15328 $$emit$$"# L_end:\n\t"
15329 } else {
15330 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
15331 }
15332 $$emit$$"# DONE"
15333 %}
15334 ins_encode %{
15335 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15336 $tmp$$XMMRegister, false, $ktmp$$KRegister);
15337 %}
15338 ins_pipe(pipe_slow);
15339 %}
15340
15341 // Large non-constant length ClearArray for non-AVX512 targets.
15342 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15343 Universe dummy, rFlagsReg cr)
15344 %{
15345 predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15346 match(Set dummy (ClearArray cnt base));
15347 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15348
15349 format %{ $$template
15350 if (UseFastStosb) {
15351 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15352 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15353 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15354 } else if (UseXMMForObjInit) {
15355 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15356 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15357 $$emit$$"jmpq L_zero_64_bytes\n\t"
15358 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15359 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15360 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15361 $$emit$$"add 0x40,rax\n\t"
15362 $$emit$$"# L_zero_64_bytes:\n\t"
15363 $$emit$$"sub 0x8,rcx\n\t"
15364 $$emit$$"jge L_loop\n\t"
15365 $$emit$$"add 0x4,rcx\n\t"
15366 $$emit$$"jl L_tail\n\t"
15367 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15368 $$emit$$"add 0x20,rax\n\t"
15369 $$emit$$"sub 0x4,rcx\n\t"
15370 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15371 $$emit$$"add 0x4,rcx\n\t"
15372 $$emit$$"jle L_end\n\t"
15373 $$emit$$"dec rcx\n\t"
15374 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15375 $$emit$$"vmovq xmm0,(rax)\n\t"
15376 $$emit$$"add 0x8,rax\n\t"
15377 $$emit$$"dec rcx\n\t"
15378 $$emit$$"jge L_sloop\n\t"
15379 $$emit$$"# L_end:\n\t"
15380 } else {
15381 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15382 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15383 }
15384 %}
15385 ins_encode %{
15386 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15387 $tmp$$XMMRegister, true, knoreg);
15388 %}
15389 ins_pipe(pipe_slow);
15390 %}
15391
15392 // Large non-constant length ClearArray for AVX512 targets.
15393 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15394 Universe dummy, rFlagsReg cr)
15395 %{
15396 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15397 match(Set dummy (ClearArray cnt base));
15398 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15399
15400 format %{ $$template
15401 if (UseFastStosb) {
15402 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15403 $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
15404 $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
15405 } else if (UseXMMForObjInit) {
15406 $$emit$$"mov rdi,rax\t# ClearArray:\n\t"
15407 $$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
15408 $$emit$$"jmpq L_zero_64_bytes\n\t"
15409 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15410 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15411 $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15412 $$emit$$"add 0x40,rax\n\t"
15413 $$emit$$"# L_zero_64_bytes:\n\t"
15414 $$emit$$"sub 0x8,rcx\n\t"
15415 $$emit$$"jge L_loop\n\t"
15416 $$emit$$"add 0x4,rcx\n\t"
15417 $$emit$$"jl L_tail\n\t"
15418 $$emit$$"vmovdqu ymm0,(rax)\n\t"
15419 $$emit$$"add 0x20,rax\n\t"
15420 $$emit$$"sub 0x4,rcx\n\t"
15421 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15422 $$emit$$"add 0x4,rcx\n\t"
15423 $$emit$$"jle L_end\n\t"
15424 $$emit$$"dec rcx\n\t"
15425 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15426 $$emit$$"vmovq xmm0,(rax)\n\t"
15427 $$emit$$"add 0x8,rax\n\t"
15428 $$emit$$"dec rcx\n\t"
15429 $$emit$$"jge L_sloop\n\t"
15430 $$emit$$"# L_end:\n\t"
15431 } else {
15432 $$emit$$"xorq rax, rax\t# ClearArray:\n\t"
15433 $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
15434 }
15435 %}
15436 ins_encode %{
15437 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15438 $tmp$$XMMRegister, true, $ktmp$$KRegister);
15439 %}
15440 ins_pipe(pipe_slow);
15441 %}
15442
15443 // Small constant length ClearArray for AVX512 targets.
15444 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15445 %{
15446 predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15447 match(Set dummy (ClearArray cnt base));
15448 ins_cost(100);
15449 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15450 format %{ "clear_mem_imm $base , $cnt \n\t" %}
15451 ins_encode %{
15452 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15453 %}
15454 ins_pipe(pipe_slow);
15455 %}
15456
15457 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15458 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15459 %{
15460 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15461 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15462 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15463
15464 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15465 ins_encode %{
15466 __ string_compare($str1$$Register, $str2$$Register,
15467 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15468 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15469 %}
15470 ins_pipe( pipe_slow );
15471 %}
15472
15473 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15474 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15475 %{
15476 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15477 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15478 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15479
15480 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15481 ins_encode %{
15482 __ string_compare($str1$$Register, $str2$$Register,
15483 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15484 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15485 %}
15486 ins_pipe( pipe_slow );
15487 %}
15488
15489 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15490 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15491 %{
15492 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15493 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15494 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15495
15496 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15497 ins_encode %{
15498 __ string_compare($str1$$Register, $str2$$Register,
15499 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15500 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15501 %}
15502 ins_pipe( pipe_slow );
15503 %}
15504
15505 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15506 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15507 %{
15508 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15509 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15510 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15511
15512 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15513 ins_encode %{
15514 __ string_compare($str1$$Register, $str2$$Register,
15515 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15516 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15517 %}
15518 ins_pipe( pipe_slow );
15519 %}
15520
15521 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15522 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15523 %{
15524 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15525 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15526 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15527
15528 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15529 ins_encode %{
15530 __ string_compare($str1$$Register, $str2$$Register,
15531 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15532 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15533 %}
15534 ins_pipe( pipe_slow );
15535 %}
15536
15537 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15538 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15539 %{
15540 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15541 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15542 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15543
15544 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15545 ins_encode %{
15546 __ string_compare($str1$$Register, $str2$$Register,
15547 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15548 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15549 %}
15550 ins_pipe( pipe_slow );
15551 %}
15552
15553 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15554 rax_RegI result, legRegD tmp1, rFlagsReg cr)
15555 %{
15556 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15557 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15558 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15559
15560 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15561 ins_encode %{
15562 __ string_compare($str2$$Register, $str1$$Register,
15563 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15564 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15565 %}
15566 ins_pipe( pipe_slow );
15567 %}
15568
15569 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15570 rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15571 %{
15572 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15573 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15574 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15575
15576 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
15577 ins_encode %{
15578 __ string_compare($str2$$Register, $str1$$Register,
15579 $cnt2$$Register, $cnt1$$Register, $result$$Register,
15580 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15581 %}
15582 ins_pipe( pipe_slow );
15583 %}
15584
15585 // fast search of substring with known size.
15586 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15587 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15588 %{
15589 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15590 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15591 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15592
15593 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15594 ins_encode %{
15595 int icnt2 = (int)$int_cnt2$$constant;
15596 if (icnt2 >= 16) {
15597 // IndexOf for constant substrings with size >= 16 elements
15598 // which don't need to be loaded through stack.
15599 __ string_indexofC8($str1$$Register, $str2$$Register,
15600 $cnt1$$Register, $cnt2$$Register,
15601 icnt2, $result$$Register,
15602 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15603 } else {
15604 // Small strings are loaded through stack if they cross page boundary.
15605 __ string_indexof($str1$$Register, $str2$$Register,
15606 $cnt1$$Register, $cnt2$$Register,
15607 icnt2, $result$$Register,
15608 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15609 }
15610 %}
15611 ins_pipe( pipe_slow );
15612 %}
15613
15614 // fast search of substring with known size.
15615 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15616 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15617 %{
15618 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15619 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15620 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15621
15622 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15623 ins_encode %{
15624 int icnt2 = (int)$int_cnt2$$constant;
15625 if (icnt2 >= 8) {
15626 // IndexOf for constant substrings with size >= 8 elements
15627 // which don't need to be loaded through stack.
15628 __ string_indexofC8($str1$$Register, $str2$$Register,
15629 $cnt1$$Register, $cnt2$$Register,
15630 icnt2, $result$$Register,
15631 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15632 } else {
15633 // Small strings are loaded through stack if they cross page boundary.
15634 __ string_indexof($str1$$Register, $str2$$Register,
15635 $cnt1$$Register, $cnt2$$Register,
15636 icnt2, $result$$Register,
15637 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15638 }
15639 %}
15640 ins_pipe( pipe_slow );
15641 %}
15642
15643 // fast search of substring with known size.
15644 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15645 rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15646 %{
15647 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15648 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15649 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15650
15651 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15652 ins_encode %{
15653 int icnt2 = (int)$int_cnt2$$constant;
15654 if (icnt2 >= 8) {
15655 // IndexOf for constant substrings with size >= 8 elements
15656 // which don't need to be loaded through stack.
15657 __ string_indexofC8($str1$$Register, $str2$$Register,
15658 $cnt1$$Register, $cnt2$$Register,
15659 icnt2, $result$$Register,
15660 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15661 } else {
15662 // Small strings are loaded through stack if they cross page boundary.
15663 __ string_indexof($str1$$Register, $str2$$Register,
15664 $cnt1$$Register, $cnt2$$Register,
15665 icnt2, $result$$Register,
15666 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15667 }
15668 %}
15669 ins_pipe( pipe_slow );
15670 %}
15671
15672 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15673 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15674 %{
15675 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15676 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15677 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15678
15679 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15680 ins_encode %{
15681 __ string_indexof($str1$$Register, $str2$$Register,
15682 $cnt1$$Register, $cnt2$$Register,
15683 (-1), $result$$Register,
15684 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15685 %}
15686 ins_pipe( pipe_slow );
15687 %}
15688
15689 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15690 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15691 %{
15692 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15693 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15694 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15695
15696 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15697 ins_encode %{
15698 __ string_indexof($str1$$Register, $str2$$Register,
15699 $cnt1$$Register, $cnt2$$Register,
15700 (-1), $result$$Register,
15701 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15702 %}
15703 ins_pipe( pipe_slow );
15704 %}
15705
15706 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15707 rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15708 %{
15709 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15710 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15711 effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15712
15713 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %}
15714 ins_encode %{
15715 __ string_indexof($str1$$Register, $str2$$Register,
15716 $cnt1$$Register, $cnt2$$Register,
15717 (-1), $result$$Register,
15718 $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15719 %}
15720 ins_pipe( pipe_slow );
15721 %}
15722
15723 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15724 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15725 %{
15726 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15727 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15728 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15729 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15730 ins_encode %{
15731 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15732 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15733 %}
15734 ins_pipe( pipe_slow );
15735 %}
15736
15737 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15738 rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15739 %{
15740 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15741 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15742 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15743 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
15744 ins_encode %{
15745 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15746 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15747 %}
15748 ins_pipe( pipe_slow );
15749 %}
15750
15751 // fast string equals
15752 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15753 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15754 %{
15755 predicate(!VM_Version::supports_avx512vlbw());
15756 match(Set result (StrEquals (Binary str1 str2) cnt));
15757 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15758
15759 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15760 ins_encode %{
15761 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15762 $cnt$$Register, $result$$Register, $tmp3$$Register,
15763 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15764 %}
15765 ins_pipe( pipe_slow );
15766 %}
15767
15768 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15769 legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15770 %{
15771 predicate(VM_Version::supports_avx512vlbw());
15772 match(Set result (StrEquals (Binary str1 str2) cnt));
15773 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15774
15775 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15776 ins_encode %{
15777 __ arrays_equals(false, $str1$$Register, $str2$$Register,
15778 $cnt$$Register, $result$$Register, $tmp3$$Register,
15779 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15780 %}
15781 ins_pipe( pipe_slow );
15782 %}
15783
15784 // fast array equals
15785 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15786 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15787 %{
15788 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15789 match(Set result (AryEq ary1 ary2));
15790 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15791
15792 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15793 ins_encode %{
15794 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15795 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15796 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15797 %}
15798 ins_pipe( pipe_slow );
15799 %}
15800
15801 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15802 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15803 %{
15804 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15805 match(Set result (AryEq ary1 ary2));
15806 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15807
15808 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15809 ins_encode %{
15810 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15811 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15812 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15813 %}
15814 ins_pipe( pipe_slow );
15815 %}
15816
15817 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15818 legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15819 %{
15820 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15821 match(Set result (AryEq ary1 ary2));
15822 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15823
15824 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15825 ins_encode %{
15826 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15827 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15828 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15829 %}
15830 ins_pipe( pipe_slow );
15831 %}
15832
15833 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15834 legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15835 %{
15836 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15837 match(Set result (AryEq ary1 ary2));
15838 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15839
15840 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15841 ins_encode %{
15842 __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15843 $tmp3$$Register, $result$$Register, $tmp4$$Register,
15844 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15845 %}
15846 ins_pipe( pipe_slow );
15847 %}
15848
15849 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15850 legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15851 legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15852 legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15853 legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15854 %{
15855 predicate(UseAVX >= 2);
15856 match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15857 effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15858 TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15859 TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15860 USE basic_type, KILL cr);
15861
15862 format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result // KILL all" %}
15863 ins_encode %{
15864 __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15865 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15866 $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15867 $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15868 $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15869 $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15870 $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15871 %}
15872 ins_pipe( pipe_slow );
15873 %}
15874
15875 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15876 legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15877 %{
15878 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15879 match(Set result (CountPositives ary1 len));
15880 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15881
15882 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15883 ins_encode %{
15884 __ count_positives($ary1$$Register, $len$$Register,
15885 $result$$Register, $tmp3$$Register,
15886 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15887 %}
15888 ins_pipe( pipe_slow );
15889 %}
15890
15891 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15892 legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15893 %{
15894 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15895 match(Set result (CountPositives ary1 len));
15896 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15897
15898 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
15899 ins_encode %{
15900 __ count_positives($ary1$$Register, $len$$Register,
15901 $result$$Register, $tmp3$$Register,
15902 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15903 %}
15904 ins_pipe( pipe_slow );
15905 %}
15906
15907 // fast char[] to byte[] compression
15908 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15909 legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15910 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15911 match(Set result (StrCompressedCopy src (Binary dst len)));
15912 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15913 USE_KILL len, KILL tmp5, KILL cr);
15914
15915 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15916 ins_encode %{
15917 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15918 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15919 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15920 knoreg, knoreg);
15921 %}
15922 ins_pipe( pipe_slow );
15923 %}
15924
15925 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15926 legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15927 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15928 match(Set result (StrCompressedCopy src (Binary dst len)));
15929 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15930 USE_KILL len, KILL tmp5, KILL cr);
15931
15932 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
15933 ins_encode %{
15934 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15935 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15936 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15937 $ktmp1$$KRegister, $ktmp2$$KRegister);
15938 %}
15939 ins_pipe( pipe_slow );
15940 %}
15941 // fast byte[] to char[] inflation
15942 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15943 legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15944 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15945 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15946 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15947
15948 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15949 ins_encode %{
15950 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15951 $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15952 %}
15953 ins_pipe( pipe_slow );
15954 %}
15955
15956 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15957 legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15958 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15959 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15960 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15961
15962 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15963 ins_encode %{
15964 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15965 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15966 %}
15967 ins_pipe( pipe_slow );
15968 %}
15969
15970 // encode char[] to byte[] in ISO_8859_1
15971 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15972 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15973 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15974 predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15975 match(Set result (EncodeISOArray src (Binary dst len)));
15976 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15977
15978 format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15979 ins_encode %{
15980 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15981 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15982 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15983 %}
15984 ins_pipe( pipe_slow );
15985 %}
15986
15987 // encode char[] to byte[] in ASCII
15988 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15989 legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15990 rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15991 predicate(((EncodeISOArrayNode*)n)->is_ascii());
15992 match(Set result (EncodeISOArray src (Binary dst len)));
15993 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15994
15995 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15996 ins_encode %{
15997 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15998 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15999 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16000 %}
16001 ins_pipe( pipe_slow );
16002 %}
16003
16004 //----------Overflow Math Instructions-----------------------------------------
16005
16006 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16007 %{
16008 match(Set cr (OverflowAddI op1 op2));
16009 effect(DEF cr, USE_KILL op1, USE op2);
16010
16011 format %{ "addl $op1, $op2\t# overflow check int" %}
16012
16013 ins_encode %{
16014 __ addl($op1$$Register, $op2$$Register);
16015 %}
16016 ins_pipe(ialu_reg_reg);
16017 %}
16018
16019 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16020 %{
16021 match(Set cr (OverflowAddI op1 op2));
16022 effect(DEF cr, USE_KILL op1, USE op2);
16023
16024 format %{ "addl $op1, $op2\t# overflow check int" %}
16025
16026 ins_encode %{
16027 __ addl($op1$$Register, $op2$$constant);
16028 %}
16029 ins_pipe(ialu_reg_reg);
16030 %}
16031
16032 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16033 %{
16034 match(Set cr (OverflowAddL op1 op2));
16035 effect(DEF cr, USE_KILL op1, USE op2);
16036
16037 format %{ "addq $op1, $op2\t# overflow check long" %}
16038 ins_encode %{
16039 __ addq($op1$$Register, $op2$$Register);
16040 %}
16041 ins_pipe(ialu_reg_reg);
16042 %}
16043
16044 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16045 %{
16046 match(Set cr (OverflowAddL op1 op2));
16047 effect(DEF cr, USE_KILL op1, USE op2);
16048
16049 format %{ "addq $op1, $op2\t# overflow check long" %}
16050 ins_encode %{
16051 __ addq($op1$$Register, $op2$$constant);
16052 %}
16053 ins_pipe(ialu_reg_reg);
16054 %}
16055
16056 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16057 %{
16058 match(Set cr (OverflowSubI op1 op2));
16059
16060 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16061 ins_encode %{
16062 __ cmpl($op1$$Register, $op2$$Register);
16063 %}
16064 ins_pipe(ialu_reg_reg);
16065 %}
16066
16067 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16068 %{
16069 match(Set cr (OverflowSubI op1 op2));
16070
16071 format %{ "cmpl $op1, $op2\t# overflow check int" %}
16072 ins_encode %{
16073 __ cmpl($op1$$Register, $op2$$constant);
16074 %}
16075 ins_pipe(ialu_reg_reg);
16076 %}
16077
16078 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16079 %{
16080 match(Set cr (OverflowSubL op1 op2));
16081
16082 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16083 ins_encode %{
16084 __ cmpq($op1$$Register, $op2$$Register);
16085 %}
16086 ins_pipe(ialu_reg_reg);
16087 %}
16088
16089 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16090 %{
16091 match(Set cr (OverflowSubL op1 op2));
16092
16093 format %{ "cmpq $op1, $op2\t# overflow check long" %}
16094 ins_encode %{
16095 __ cmpq($op1$$Register, $op2$$constant);
16096 %}
16097 ins_pipe(ialu_reg_reg);
16098 %}
16099
16100 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16101 %{
16102 match(Set cr (OverflowSubI zero op2));
16103 effect(DEF cr, USE_KILL op2);
16104
16105 format %{ "negl $op2\t# overflow check int" %}
16106 ins_encode %{
16107 __ negl($op2$$Register);
16108 %}
16109 ins_pipe(ialu_reg_reg);
16110 %}
16111
16112 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16113 %{
16114 match(Set cr (OverflowSubL zero op2));
16115 effect(DEF cr, USE_KILL op2);
16116
16117 format %{ "negq $op2\t# overflow check long" %}
16118 ins_encode %{
16119 __ negq($op2$$Register);
16120 %}
16121 ins_pipe(ialu_reg_reg);
16122 %}
16123
16124 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16125 %{
16126 match(Set cr (OverflowMulI op1 op2));
16127 effect(DEF cr, USE_KILL op1, USE op2);
16128
16129 format %{ "imull $op1, $op2\t# overflow check int" %}
16130 ins_encode %{
16131 __ imull($op1$$Register, $op2$$Register);
16132 %}
16133 ins_pipe(ialu_reg_reg_alu0);
16134 %}
16135
16136 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16137 %{
16138 match(Set cr (OverflowMulI op1 op2));
16139 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16140
16141 format %{ "imull $tmp, $op1, $op2\t# overflow check int" %}
16142 ins_encode %{
16143 __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16144 %}
16145 ins_pipe(ialu_reg_reg_alu0);
16146 %}
16147
16148 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16149 %{
16150 match(Set cr (OverflowMulL op1 op2));
16151 effect(DEF cr, USE_KILL op1, USE op2);
16152
16153 format %{ "imulq $op1, $op2\t# overflow check long" %}
16154 ins_encode %{
16155 __ imulq($op1$$Register, $op2$$Register);
16156 %}
16157 ins_pipe(ialu_reg_reg_alu0);
16158 %}
16159
16160 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16161 %{
16162 match(Set cr (OverflowMulL op1 op2));
16163 effect(DEF cr, TEMP tmp, USE op1, USE op2);
16164
16165 format %{ "imulq $tmp, $op1, $op2\t# overflow check long" %}
16166 ins_encode %{
16167 __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16168 %}
16169 ins_pipe(ialu_reg_reg_alu0);
16170 %}
16171
16172
16173 //----------Control Flow Instructions------------------------------------------
16174 // Signed compare Instructions
16175
16176 // XXX more variants!!
16177 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16178 %{
16179 match(Set cr (CmpI op1 op2));
16180 effect(DEF cr, USE op1, USE op2);
16181
16182 format %{ "cmpl $op1, $op2" %}
16183 ins_encode %{
16184 __ cmpl($op1$$Register, $op2$$Register);
16185 %}
16186 ins_pipe(ialu_cr_reg_reg);
16187 %}
16188
16189 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16190 %{
16191 match(Set cr (CmpI op1 op2));
16192
16193 format %{ "cmpl $op1, $op2" %}
16194 ins_encode %{
16195 __ cmpl($op1$$Register, $op2$$constant);
16196 %}
16197 ins_pipe(ialu_cr_reg_imm);
16198 %}
16199
16200 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16201 %{
16202 match(Set cr (CmpI op1 (LoadI op2)));
16203
16204 ins_cost(500); // XXX
16205 format %{ "cmpl $op1, $op2" %}
16206 ins_encode %{
16207 __ cmpl($op1$$Register, $op2$$Address);
16208 %}
16209 ins_pipe(ialu_cr_reg_mem);
16210 %}
16211
16212 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16213 %{
16214 match(Set cr (CmpI src zero));
16215
16216 format %{ "testl $src, $src" %}
16217 ins_encode %{
16218 __ testl($src$$Register, $src$$Register);
16219 %}
16220 ins_pipe(ialu_cr_reg_imm);
16221 %}
16222
16223 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16224 %{
16225 match(Set cr (CmpI (AndI src con) zero));
16226
16227 format %{ "testl $src, $con" %}
16228 ins_encode %{
16229 __ testl($src$$Register, $con$$constant);
16230 %}
16231 ins_pipe(ialu_cr_reg_imm);
16232 %}
16233
16234 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16235 %{
16236 match(Set cr (CmpI (AndI src1 src2) zero));
16237
16238 format %{ "testl $src1, $src2" %}
16239 ins_encode %{
16240 __ testl($src1$$Register, $src2$$Register);
16241 %}
16242 ins_pipe(ialu_cr_reg_imm);
16243 %}
16244
16245 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16246 %{
16247 match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16248
16249 format %{ "testl $src, $mem" %}
16250 ins_encode %{
16251 __ testl($src$$Register, $mem$$Address);
16252 %}
16253 ins_pipe(ialu_cr_reg_mem);
16254 %}
16255
16256 // Unsigned compare Instructions; really, same as signed except they
16257 // produce an rFlagsRegU instead of rFlagsReg.
16258 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16259 %{
16260 match(Set cr (CmpU op1 op2));
16261
16262 format %{ "cmpl $op1, $op2\t# unsigned" %}
16263 ins_encode %{
16264 __ cmpl($op1$$Register, $op2$$Register);
16265 %}
16266 ins_pipe(ialu_cr_reg_reg);
16267 %}
16268
16269 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16270 %{
16271 match(Set cr (CmpU op1 op2));
16272
16273 format %{ "cmpl $op1, $op2\t# unsigned" %}
16274 ins_encode %{
16275 __ cmpl($op1$$Register, $op2$$constant);
16276 %}
16277 ins_pipe(ialu_cr_reg_imm);
16278 %}
16279
16280 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16281 %{
16282 match(Set cr (CmpU op1 (LoadI op2)));
16283
16284 ins_cost(500); // XXX
16285 format %{ "cmpl $op1, $op2\t# unsigned" %}
16286 ins_encode %{
16287 __ cmpl($op1$$Register, $op2$$Address);
16288 %}
16289 ins_pipe(ialu_cr_reg_mem);
16290 %}
16291
16292 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16293 %{
16294 match(Set cr (CmpU src zero));
16295
16296 format %{ "testl $src, $src\t# unsigned" %}
16297 ins_encode %{
16298 __ testl($src$$Register, $src$$Register);
16299 %}
16300 ins_pipe(ialu_cr_reg_imm);
16301 %}
16302
16303 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16304 %{
16305 match(Set cr (CmpP op1 op2));
16306
16307 format %{ "cmpq $op1, $op2\t# ptr" %}
16308 ins_encode %{
16309 __ cmpq($op1$$Register, $op2$$Register);
16310 %}
16311 ins_pipe(ialu_cr_reg_reg);
16312 %}
16313
16314 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16315 %{
16316 match(Set cr (CmpP op1 (LoadP op2)));
16317 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16318
16319 ins_cost(500); // XXX
16320 format %{ "cmpq $op1, $op2\t# ptr" %}
16321 ins_encode %{
16322 __ cmpq($op1$$Register, $op2$$Address);
16323 %}
16324 ins_pipe(ialu_cr_reg_mem);
16325 %}
16326
16327 // XXX this is generalized by compP_rReg_mem???
16328 // Compare raw pointer (used in out-of-heap check).
16329 // Only works because non-oop pointers must be raw pointers
16330 // and raw pointers have no anti-dependencies.
16331 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16332 %{
16333 predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
16334 n->in(2)->as_Load()->barrier_data() == 0);
16335 match(Set cr (CmpP op1 (LoadP op2)));
16336
16337 format %{ "cmpq $op1, $op2\t# raw ptr" %}
16338 ins_encode %{
16339 __ cmpq($op1$$Register, $op2$$Address);
16340 %}
16341 ins_pipe(ialu_cr_reg_mem);
16342 %}
16343
16344 // This will generate a signed flags result. This should be OK since
16345 // any compare to a zero should be eq/neq.
16346 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16347 %{
16348 match(Set cr (CmpP src zero));
16349
16350 format %{ "testq $src, $src\t# ptr" %}
16351 ins_encode %{
16352 __ testq($src$$Register, $src$$Register);
16353 %}
16354 ins_pipe(ialu_cr_reg_imm);
16355 %}
16356
16357 // This will generate a signed flags result. This should be OK since
16358 // any compare to a zero should be eq/neq.
16359 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16360 %{
16361 predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16362 n->in(1)->as_Load()->barrier_data() == 0);
16363 match(Set cr (CmpP (LoadP op) zero));
16364
16365 ins_cost(500); // XXX
16366 format %{ "testq $op, 0xffffffffffffffff\t# ptr" %}
16367 ins_encode %{
16368 __ testq($op$$Address, 0xFFFFFFFF);
16369 %}
16370 ins_pipe(ialu_cr_reg_imm);
16371 %}
16372
16373 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16374 %{
16375 predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16376 n->in(1)->as_Load()->barrier_data() == 0);
16377 match(Set cr (CmpP (LoadP mem) zero));
16378
16379 format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
16380 ins_encode %{
16381 __ cmpq(r12, $mem$$Address);
16382 %}
16383 ins_pipe(ialu_cr_reg_mem);
16384 %}
16385
16386 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16387 %{
16388 match(Set cr (CmpN op1 op2));
16389
16390 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16391 ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16392 ins_pipe(ialu_cr_reg_reg);
16393 %}
16394
16395 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16396 %{
16397 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16398 match(Set cr (CmpN src (LoadN mem)));
16399
16400 format %{ "cmpl $src, $mem\t# compressed ptr" %}
16401 ins_encode %{
16402 __ cmpl($src$$Register, $mem$$Address);
16403 %}
16404 ins_pipe(ialu_cr_reg_mem);
16405 %}
16406
16407 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16408 match(Set cr (CmpN op1 op2));
16409
16410 format %{ "cmpl $op1, $op2\t# compressed ptr" %}
16411 ins_encode %{
16412 __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16413 %}
16414 ins_pipe(ialu_cr_reg_imm);
16415 %}
16416
16417 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16418 %{
16419 predicate(n->in(2)->as_Load()->barrier_data() == 0);
16420 match(Set cr (CmpN src (LoadN mem)));
16421
16422 format %{ "cmpl $mem, $src\t# compressed ptr" %}
16423 ins_encode %{
16424 __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16425 %}
16426 ins_pipe(ialu_cr_reg_mem);
16427 %}
16428
16429 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16430 match(Set cr (CmpN op1 op2));
16431
16432 format %{ "cmpl $op1, $op2\t# compressed klass ptr" %}
16433 ins_encode %{
16434 __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16435 %}
16436 ins_pipe(ialu_cr_reg_imm);
16437 %}
16438
16439 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16440 %{
16441 predicate(!UseCompactObjectHeaders);
16442 match(Set cr (CmpN src (LoadNKlass mem)));
16443
16444 format %{ "cmpl $mem, $src\t# compressed klass ptr" %}
16445 ins_encode %{
16446 __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16447 %}
16448 ins_pipe(ialu_cr_reg_mem);
16449 %}
16450
16451 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16452 match(Set cr (CmpN src zero));
16453
16454 format %{ "testl $src, $src\t# compressed ptr" %}
16455 ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16456 ins_pipe(ialu_cr_reg_imm);
16457 %}
16458
16459 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16460 %{
16461 predicate(CompressedOops::base() != nullptr &&
16462 n->in(1)->as_Load()->barrier_data() == 0);
16463 match(Set cr (CmpN (LoadN mem) zero));
16464
16465 ins_cost(500); // XXX
16466 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
16467 ins_encode %{
16468 __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16469 %}
16470 ins_pipe(ialu_cr_reg_mem);
16471 %}
16472
16473 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16474 %{
16475 predicate(CompressedOops::base() == nullptr &&
16476 n->in(1)->as_Load()->barrier_data() == 0);
16477 match(Set cr (CmpN (LoadN mem) zero));
16478
16479 format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16480 ins_encode %{
16481 __ cmpl(r12, $mem$$Address);
16482 %}
16483 ins_pipe(ialu_cr_reg_mem);
16484 %}
16485
16486 // Yanked all unsigned pointer compare operations.
16487 // Pointer compares are done with CmpP which is already unsigned.
16488
16489 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16490 %{
16491 match(Set cr (CmpL op1 op2));
16492
16493 format %{ "cmpq $op1, $op2" %}
16494 ins_encode %{
16495 __ cmpq($op1$$Register, $op2$$Register);
16496 %}
16497 ins_pipe(ialu_cr_reg_reg);
16498 %}
16499
16500 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16501 %{
16502 match(Set cr (CmpL op1 op2));
16503
16504 format %{ "cmpq $op1, $op2" %}
16505 ins_encode %{
16506 __ cmpq($op1$$Register, $op2$$constant);
16507 %}
16508 ins_pipe(ialu_cr_reg_imm);
16509 %}
16510
16511 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16512 %{
16513 match(Set cr (CmpL op1 (LoadL op2)));
16514
16515 format %{ "cmpq $op1, $op2" %}
16516 ins_encode %{
16517 __ cmpq($op1$$Register, $op2$$Address);
16518 %}
16519 ins_pipe(ialu_cr_reg_mem);
16520 %}
16521
16522 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16523 %{
16524 match(Set cr (CmpL src zero));
16525
16526 format %{ "testq $src, $src" %}
16527 ins_encode %{
16528 __ testq($src$$Register, $src$$Register);
16529 %}
16530 ins_pipe(ialu_cr_reg_imm);
16531 %}
16532
16533 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16534 %{
16535 match(Set cr (CmpL (AndL src con) zero));
16536
16537 format %{ "testq $src, $con\t# long" %}
16538 ins_encode %{
16539 __ testq($src$$Register, $con$$constant);
16540 %}
16541 ins_pipe(ialu_cr_reg_imm);
16542 %}
16543
16544 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16545 %{
16546 match(Set cr (CmpL (AndL src1 src2) zero));
16547
16548 format %{ "testq $src1, $src2\t# long" %}
16549 ins_encode %{
16550 __ testq($src1$$Register, $src2$$Register);
16551 %}
16552 ins_pipe(ialu_cr_reg_imm);
16553 %}
16554
16555 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16556 %{
16557 match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16558
16559 format %{ "testq $src, $mem" %}
16560 ins_encode %{
16561 __ testq($src$$Register, $mem$$Address);
16562 %}
16563 ins_pipe(ialu_cr_reg_mem);
16564 %}
16565
16566 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16567 %{
16568 match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16569
16570 format %{ "testq $src, $mem" %}
16571 ins_encode %{
16572 __ testq($src$$Register, $mem$$Address);
16573 %}
16574 ins_pipe(ialu_cr_reg_mem);
16575 %}
16576
16577 // Manifest a CmpU result in an integer register. Very painful.
16578 // This is the test to avoid.
16579 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16580 %{
16581 match(Set dst (CmpU3 src1 src2));
16582 effect(KILL flags);
16583
16584 ins_cost(275); // XXX
16585 format %{ "cmpl $src1, $src2\t# CmpL3\n\t"
16586 "movl $dst, -1\n\t"
16587 "jb,u done\n\t"
16588 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16589 "done:" %}
16590 ins_encode %{
16591 Label done;
16592 __ cmpl($src1$$Register, $src2$$Register);
16593 __ movl($dst$$Register, -1);
16594 __ jccb(Assembler::below, done);
16595 __ setcc(Assembler::notZero, $dst$$Register);
16596 __ bind(done);
16597 %}
16598 ins_pipe(pipe_slow);
16599 %}
16600
16601 // Manifest a CmpL result in an integer register. Very painful.
16602 // This is the test to avoid.
16603 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16604 %{
16605 match(Set dst (CmpL3 src1 src2));
16606 effect(KILL flags);
16607
16608 ins_cost(275); // XXX
16609 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16610 "movl $dst, -1\n\t"
16611 "jl,s done\n\t"
16612 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16613 "done:" %}
16614 ins_encode %{
16615 Label done;
16616 __ cmpq($src1$$Register, $src2$$Register);
16617 __ movl($dst$$Register, -1);
16618 __ jccb(Assembler::less, done);
16619 __ setcc(Assembler::notZero, $dst$$Register);
16620 __ bind(done);
16621 %}
16622 ins_pipe(pipe_slow);
16623 %}
16624
16625 // Manifest a CmpUL result in an integer register. Very painful.
16626 // This is the test to avoid.
16627 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16628 %{
16629 match(Set dst (CmpUL3 src1 src2));
16630 effect(KILL flags);
16631
16632 ins_cost(275); // XXX
16633 format %{ "cmpq $src1, $src2\t# CmpL3\n\t"
16634 "movl $dst, -1\n\t"
16635 "jb,u done\n\t"
16636 "setcc $dst \t# emits setne + movzbl or setzune for APX"
16637 "done:" %}
16638 ins_encode %{
16639 Label done;
16640 __ cmpq($src1$$Register, $src2$$Register);
16641 __ movl($dst$$Register, -1);
16642 __ jccb(Assembler::below, done);
16643 __ setcc(Assembler::notZero, $dst$$Register);
16644 __ bind(done);
16645 %}
16646 ins_pipe(pipe_slow);
16647 %}
16648
16649 // Unsigned long compare Instructions; really, same as signed long except they
16650 // produce an rFlagsRegU instead of rFlagsReg.
16651 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16652 %{
16653 match(Set cr (CmpUL op1 op2));
16654
16655 format %{ "cmpq $op1, $op2\t# unsigned" %}
16656 ins_encode %{
16657 __ cmpq($op1$$Register, $op2$$Register);
16658 %}
16659 ins_pipe(ialu_cr_reg_reg);
16660 %}
16661
16662 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16663 %{
16664 match(Set cr (CmpUL op1 op2));
16665
16666 format %{ "cmpq $op1, $op2\t# unsigned" %}
16667 ins_encode %{
16668 __ cmpq($op1$$Register, $op2$$constant);
16669 %}
16670 ins_pipe(ialu_cr_reg_imm);
16671 %}
16672
16673 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16674 %{
16675 match(Set cr (CmpUL op1 (LoadL op2)));
16676
16677 format %{ "cmpq $op1, $op2\t# unsigned" %}
16678 ins_encode %{
16679 __ cmpq($op1$$Register, $op2$$Address);
16680 %}
16681 ins_pipe(ialu_cr_reg_mem);
16682 %}
16683
16684 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16685 %{
16686 match(Set cr (CmpUL src zero));
16687
16688 format %{ "testq $src, $src\t# unsigned" %}
16689 ins_encode %{
16690 __ testq($src$$Register, $src$$Register);
16691 %}
16692 ins_pipe(ialu_cr_reg_imm);
16693 %}
16694
16695 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16696 %{
16697 match(Set cr (CmpI (LoadB mem) imm));
16698
16699 ins_cost(125);
16700 format %{ "cmpb $mem, $imm" %}
16701 ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16702 ins_pipe(ialu_cr_reg_mem);
16703 %}
16704
16705 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16706 %{
16707 match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16708
16709 ins_cost(125);
16710 format %{ "testb $mem, $imm\t# ubyte" %}
16711 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16712 ins_pipe(ialu_cr_reg_mem);
16713 %}
16714
16715 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16716 %{
16717 match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16718
16719 ins_cost(125);
16720 format %{ "testb $mem, $imm\t# byte" %}
16721 ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16722 ins_pipe(ialu_cr_reg_mem);
16723 %}
16724
16725 //----------Max and Min--------------------------------------------------------
16726 // Min Instructions
16727
16728 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16729 %{
16730 predicate(!UseAPX);
16731 effect(USE_DEF dst, USE src, USE cr);
16732
16733 format %{ "cmovlgt $dst, $src\t# min" %}
16734 ins_encode %{
16735 __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16736 %}
16737 ins_pipe(pipe_cmov_reg);
16738 %}
16739
16740 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16741 %{
16742 predicate(UseAPX);
16743 effect(DEF dst, USE src1, USE src2, USE cr);
16744
16745 format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16746 ins_encode %{
16747 __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16748 %}
16749 ins_pipe(pipe_cmov_reg);
16750 %}
16751
16752 instruct minI_rReg(rRegI dst, rRegI src)
16753 %{
16754 predicate(!UseAPX);
16755 match(Set dst (MinI dst src));
16756
16757 ins_cost(200);
16758 expand %{
16759 rFlagsReg cr;
16760 compI_rReg(cr, dst, src);
16761 cmovI_reg_g(dst, src, cr);
16762 %}
16763 %}
16764
16765 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16766 %{
16767 predicate(UseAPX);
16768 match(Set dst (MinI src1 src2));
16769 effect(DEF dst, USE src1, USE src2);
16770 flag(PD::Flag_ndd_demotable_opr1);
16771
16772 ins_cost(200);
16773 expand %{
16774 rFlagsReg cr;
16775 compI_rReg(cr, src1, src2);
16776 cmovI_reg_g_ndd(dst, src1, src2, cr);
16777 %}
16778 %}
16779
16780 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16781 %{
16782 predicate(!UseAPX);
16783 effect(USE_DEF dst, USE src, USE cr);
16784
16785 format %{ "cmovllt $dst, $src\t# max" %}
16786 ins_encode %{
16787 __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16788 %}
16789 ins_pipe(pipe_cmov_reg);
16790 %}
16791
16792 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16793 %{
16794 predicate(UseAPX);
16795 effect(DEF dst, USE src1, USE src2, USE cr);
16796
16797 format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16798 ins_encode %{
16799 __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16800 %}
16801 ins_pipe(pipe_cmov_reg);
16802 %}
16803
16804 instruct maxI_rReg(rRegI dst, rRegI src)
16805 %{
16806 predicate(!UseAPX);
16807 match(Set dst (MaxI dst src));
16808
16809 ins_cost(200);
16810 expand %{
16811 rFlagsReg cr;
16812 compI_rReg(cr, dst, src);
16813 cmovI_reg_l(dst, src, cr);
16814 %}
16815 %}
16816
16817 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16818 %{
16819 predicate(UseAPX);
16820 match(Set dst (MaxI src1 src2));
16821 effect(DEF dst, USE src1, USE src2);
16822 flag(PD::Flag_ndd_demotable_opr1);
16823
16824 ins_cost(200);
16825 expand %{
16826 rFlagsReg cr;
16827 compI_rReg(cr, src1, src2);
16828 cmovI_reg_l_ndd(dst, src1, src2, cr);
16829 %}
16830 %}
16831
16832 // ============================================================================
16833 // Branch Instructions
16834
16835 // Jump Direct - Label defines a relative address from JMP+1
16836 instruct jmpDir(label labl)
16837 %{
16838 match(Goto);
16839 effect(USE labl);
16840
16841 ins_cost(300);
16842 format %{ "jmp $labl" %}
16843 size(5);
16844 ins_encode %{
16845 Label* L = $labl$$label;
16846 __ jmp(*L, false); // Always long jump
16847 %}
16848 ins_pipe(pipe_jmp);
16849 %}
16850
16851 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16852 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16853 %{
16854 match(If cop cr);
16855 effect(USE labl);
16856
16857 ins_cost(300);
16858 format %{ "j$cop $labl" %}
16859 size(6);
16860 ins_encode %{
16861 Label* L = $labl$$label;
16862 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16863 %}
16864 ins_pipe(pipe_jcc);
16865 %}
16866
16867 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16868 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16869 %{
16870 match(CountedLoopEnd cop cr);
16871 effect(USE labl);
16872
16873 ins_cost(300);
16874 format %{ "j$cop $labl\t# loop end" %}
16875 size(6);
16876 ins_encode %{
16877 Label* L = $labl$$label;
16878 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16879 %}
16880 ins_pipe(pipe_jcc);
16881 %}
16882
16883 // Jump Direct Conditional - using unsigned comparison
16884 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16885 match(If cop cmp);
16886 effect(USE labl);
16887
16888 ins_cost(300);
16889 format %{ "j$cop,u $labl" %}
16890 size(6);
16891 ins_encode %{
16892 Label* L = $labl$$label;
16893 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16894 %}
16895 ins_pipe(pipe_jcc);
16896 %}
16897
16898 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16899 match(If cop cmp);
16900 effect(USE labl);
16901
16902 ins_cost(200);
16903 format %{ "j$cop,u $labl" %}
16904 size(6);
16905 ins_encode %{
16906 Label* L = $labl$$label;
16907 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16908 %}
16909 ins_pipe(pipe_jcc);
16910 %}
16911
16912 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16913 match(If cop cmp);
16914 effect(USE labl);
16915
16916 ins_cost(200);
16917 format %{ $$template
16918 if ($cop$$cmpcode == Assembler::notEqual) {
16919 $$emit$$"jp,u $labl\n\t"
16920 $$emit$$"j$cop,u $labl"
16921 } else {
16922 $$emit$$"jp,u done\n\t"
16923 $$emit$$"j$cop,u $labl\n\t"
16924 $$emit$$"done:"
16925 }
16926 %}
16927 ins_encode %{
16928 Label* l = $labl$$label;
16929 if ($cop$$cmpcode == Assembler::notEqual) {
16930 __ jcc(Assembler::parity, *l, false);
16931 __ jcc(Assembler::notEqual, *l, false);
16932 } else if ($cop$$cmpcode == Assembler::equal) {
16933 Label done;
16934 __ jccb(Assembler::parity, done);
16935 __ jcc(Assembler::equal, *l, false);
16936 __ bind(done);
16937 } else {
16938 ShouldNotReachHere();
16939 }
16940 %}
16941 ins_pipe(pipe_jcc);
16942 %}
16943
16944 // Jump Direct Conditional - using signed and unsigned comparison
16945 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16946 match(If cop cmp);
16947 effect(USE labl);
16948
16949 ins_cost(200);
16950 format %{ "j$cop,su $labl" %}
16951 size(6);
16952 ins_encode %{
16953 Label* L = $labl$$label;
16954 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16955 %}
16956 ins_pipe(pipe_jcc);
16957 %}
16958
16959 // ============================================================================
16960 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary
16961 // superklass array for an instance of the superklass. Set a hidden
16962 // internal cache on a hit (cache is checked with exposed code in
16963 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
16964 // encoding ALSO sets flags.
16965
16966 instruct partialSubtypeCheck(rdi_RegP result,
16967 rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16968 rFlagsReg cr)
16969 %{
16970 match(Set result (PartialSubtypeCheck sub super));
16971 predicate(!UseSecondarySupersTable);
16972 effect(KILL rcx, KILL cr);
16973
16974 ins_cost(1100); // slightly larger than the next version
16975 format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16976 "movl rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16977 "addq rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16978 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16979 "jne,s miss\t\t# Missed: rdi not-zero\n\t"
16980 "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16981 "xorq $result, $result\t\t Hit: rdi zero\n\t"
16982 "miss:\t" %}
16983
16984 ins_encode %{
16985 Label miss;
16986 // NB: Callers may assume that, when $result is a valid register,
16987 // check_klass_subtype_slow_path_linear sets it to a nonzero
16988 // value.
16989 __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16990 $rcx$$Register, $result$$Register,
16991 nullptr, &miss,
16992 /*set_cond_codes:*/ true);
16993 __ xorptr($result$$Register, $result$$Register);
16994 __ bind(miss);
16995 %}
16996
16997 ins_pipe(pipe_slow);
16998 %}
16999
17000 // ============================================================================
17001 // Two versions of hashtable-based partialSubtypeCheck, both used when
17002 // we need to search for a super class in the secondary supers array.
17003 // The first is used when we don't know _a priori_ the class being
17004 // searched for. The second, far more common, is used when we do know:
17005 // this is used for instanceof, checkcast, and any case where C2 can
17006 // determine it by constant propagation.
17007
17008 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17009 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17010 rFlagsReg cr)
17011 %{
17012 match(Set result (PartialSubtypeCheck sub super));
17013 predicate(UseSecondarySupersTable);
17014 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17015
17016 ins_cost(1000);
17017 format %{ "partialSubtypeCheck $result, $sub, $super" %}
17018
17019 ins_encode %{
17020 __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17021 $temp3$$Register, $temp4$$Register, $result$$Register);
17022 %}
17023
17024 ins_pipe(pipe_slow);
17025 %}
17026
17027 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17028 rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17029 rFlagsReg cr)
17030 %{
17031 match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17032 predicate(UseSecondarySupersTable);
17033 effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17034
17035 ins_cost(700); // smaller than the next version
17036 format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17037
17038 ins_encode %{
17039 u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17040 if (InlineSecondarySupersTest) {
17041 __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17042 $temp3$$Register, $temp4$$Register, $result$$Register,
17043 super_klass_slot);
17044 } else {
17045 __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17046 }
17047 %}
17048
17049 ins_pipe(pipe_slow);
17050 %}
17051
17052 // ============================================================================
17053 // Branch Instructions -- short offset versions
17054 //
17055 // These instructions are used to replace jumps of a long offset (the default
17056 // match) with jumps of a shorter offset. These instructions are all tagged
17057 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17058 // match rules in general matching. Instead, the ADLC generates a conversion
17059 // method in the MachNode which can be used to do in-place replacement of the
17060 // long variant with the shorter variant. The compiler will determine if a
17061 // branch can be taken by the is_short_branch_offset() predicate in the machine
17062 // specific code section of the file.
17063
17064 // Jump Direct - Label defines a relative address from JMP+1
17065 instruct jmpDir_short(label labl) %{
17066 match(Goto);
17067 effect(USE labl);
17068
17069 ins_cost(300);
17070 format %{ "jmp,s $labl" %}
17071 size(2);
17072 ins_encode %{
17073 Label* L = $labl$$label;
17074 __ jmpb(*L);
17075 %}
17076 ins_pipe(pipe_jmp);
17077 ins_short_branch(1);
17078 %}
17079
17080 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17081 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17082 match(If cop cr);
17083 effect(USE labl);
17084
17085 ins_cost(300);
17086 format %{ "j$cop,s $labl" %}
17087 size(2);
17088 ins_encode %{
17089 Label* L = $labl$$label;
17090 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17091 %}
17092 ins_pipe(pipe_jcc);
17093 ins_short_branch(1);
17094 %}
17095
17096 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17097 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17098 match(CountedLoopEnd cop cr);
17099 effect(USE labl);
17100
17101 ins_cost(300);
17102 format %{ "j$cop,s $labl\t# loop end" %}
17103 size(2);
17104 ins_encode %{
17105 Label* L = $labl$$label;
17106 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17107 %}
17108 ins_pipe(pipe_jcc);
17109 ins_short_branch(1);
17110 %}
17111
17112 // Jump Direct Conditional - using unsigned comparison
17113 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17114 match(If cop cmp);
17115 effect(USE labl);
17116
17117 ins_cost(300);
17118 format %{ "j$cop,us $labl" %}
17119 size(2);
17120 ins_encode %{
17121 Label* L = $labl$$label;
17122 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17123 %}
17124 ins_pipe(pipe_jcc);
17125 ins_short_branch(1);
17126 %}
17127
17128 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17129 match(If cop cmp);
17130 effect(USE labl);
17131
17132 ins_cost(300);
17133 format %{ "j$cop,us $labl" %}
17134 size(2);
17135 ins_encode %{
17136 Label* L = $labl$$label;
17137 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17138 %}
17139 ins_pipe(pipe_jcc);
17140 ins_short_branch(1);
17141 %}
17142
17143 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17144 match(If cop cmp);
17145 effect(USE labl);
17146
17147 ins_cost(300);
17148 format %{ $$template
17149 if ($cop$$cmpcode == Assembler::notEqual) {
17150 $$emit$$"jp,u,s $labl\n\t"
17151 $$emit$$"j$cop,u,s $labl"
17152 } else {
17153 $$emit$$"jp,u,s done\n\t"
17154 $$emit$$"j$cop,u,s $labl\n\t"
17155 $$emit$$"done:"
17156 }
17157 %}
17158 size(4);
17159 ins_encode %{
17160 Label* l = $labl$$label;
17161 if ($cop$$cmpcode == Assembler::notEqual) {
17162 __ jccb(Assembler::parity, *l);
17163 __ jccb(Assembler::notEqual, *l);
17164 } else if ($cop$$cmpcode == Assembler::equal) {
17165 Label done;
17166 __ jccb(Assembler::parity, done);
17167 __ jccb(Assembler::equal, *l);
17168 __ bind(done);
17169 } else {
17170 ShouldNotReachHere();
17171 }
17172 %}
17173 ins_pipe(pipe_jcc);
17174 ins_short_branch(1);
17175 %}
17176
17177 // Jump Direct Conditional - using signed and unsigned comparison
17178 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17179 match(If cop cmp);
17180 effect(USE labl);
17181
17182 ins_cost(300);
17183 format %{ "j$cop,sus $labl" %}
17184 size(2);
17185 ins_encode %{
17186 Label* L = $labl$$label;
17187 __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17188 %}
17189 ins_pipe(pipe_jcc);
17190 ins_short_branch(1);
17191 %}
17192
17193 // ============================================================================
17194 // inlined locking and unlocking
17195
17196 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17197 match(Set cr (FastLock object box));
17198 effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17199 ins_cost(300);
17200 format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17201 ins_encode %{
17202 __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17203 %}
17204 ins_pipe(pipe_slow);
17205 %}
17206
17207 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17208 match(Set cr (FastUnlock object rax_reg));
17209 effect(TEMP tmp, USE_KILL rax_reg);
17210 ins_cost(300);
17211 format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17212 ins_encode %{
17213 __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17214 %}
17215 ins_pipe(pipe_slow);
17216 %}
17217
17218
17219 // ============================================================================
17220 // Safepoint Instructions
17221 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17222 %{
17223 match(SafePoint poll);
17224 effect(KILL cr, USE poll);
17225
17226 format %{ "testl rax, [$poll]\t"
17227 "# Safepoint: poll for GC" %}
17228 ins_cost(125);
17229 ins_encode %{
17230 __ relocate(relocInfo::poll_type);
17231 address pre_pc = __ pc();
17232 __ testl(rax, Address($poll$$Register, 0));
17233 assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17234 %}
17235 ins_pipe(ialu_reg_mem);
17236 %}
17237
17238 instruct mask_all_evexL(kReg dst, rRegL src) %{
17239 match(Set dst (MaskAll src));
17240 format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17241 ins_encode %{
17242 int mask_len = Matcher::vector_length(this);
17243 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17244 %}
17245 ins_pipe( pipe_slow );
17246 %}
17247
17248 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17249 predicate(Matcher::vector_length(n) > 32);
17250 match(Set dst (MaskAll src));
17251 effect(TEMP tmp);
17252 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17253 ins_encode %{
17254 int mask_len = Matcher::vector_length(this);
17255 __ movslq($tmp$$Register, $src$$Register);
17256 __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17257 %}
17258 ins_pipe( pipe_slow );
17259 %}
17260
17261 // ============================================================================
17262 // Procedure Call/Return Instructions
17263 // Call Java Static Instruction
17264 // Note: If this code changes, the corresponding ret_addr_offset() and
17265 // compute_padding() functions will have to be adjusted.
17266 instruct CallStaticJavaDirect(method meth) %{
17267 match(CallStaticJava);
17268 effect(USE meth);
17269
17270 ins_cost(300);
17271 format %{ "call,static " %}
17272 opcode(0xE8); /* E8 cd */
17273 ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17274 ins_pipe(pipe_slow);
17275 ins_alignment(4);
17276 %}
17277
17278 // Call Java Dynamic Instruction
17279 // Note: If this code changes, the corresponding ret_addr_offset() and
17280 // compute_padding() functions will have to be adjusted.
17281 instruct CallDynamicJavaDirect(method meth)
17282 %{
17283 match(CallDynamicJava);
17284 effect(USE meth);
17285
17286 ins_cost(300);
17287 format %{ "movq rax, #Universe::non_oop_word()\n\t"
17288 "call,dynamic " %}
17289 ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17290 ins_pipe(pipe_slow);
17291 ins_alignment(4);
17292 %}
17293
17294 // Call Runtime Instruction
17295 instruct CallRuntimeDirect(method meth)
17296 %{
17297 match(CallRuntime);
17298 effect(USE meth);
17299
17300 ins_cost(300);
17301 format %{ "call,runtime " %}
17302 ins_encode(clear_avx, Java_To_Runtime(meth));
17303 ins_pipe(pipe_slow);
17304 %}
17305
17306 // Call runtime without safepoint
17307 instruct CallLeafDirect(method meth)
17308 %{
17309 match(CallLeaf);
17310 effect(USE meth);
17311
17312 ins_cost(300);
17313 format %{ "call_leaf,runtime " %}
17314 ins_encode(clear_avx, Java_To_Runtime(meth));
17315 ins_pipe(pipe_slow);
17316 %}
17317
17318 // Call runtime without safepoint and with vector arguments
17319 instruct CallLeafDirectVector(method meth)
17320 %{
17321 match(CallLeafVector);
17322 effect(USE meth);
17323
17324 ins_cost(300);
17325 format %{ "call_leaf,vector " %}
17326 ins_encode(Java_To_Runtime(meth));
17327 ins_pipe(pipe_slow);
17328 %}
17329
17330 // Call runtime without safepoint
17331 instruct CallLeafNoFPDirect(method meth)
17332 %{
17333 match(CallLeafNoFP);
17334 effect(USE meth);
17335
17336 ins_cost(300);
17337 format %{ "call_leaf_nofp,runtime " %}
17338 ins_encode(clear_avx, Java_To_Runtime(meth));
17339 ins_pipe(pipe_slow);
17340 %}
17341
17342 // Return Instruction
17343 // Remove the return address & jump to it.
17344 // Notice: We always emit a nop after a ret to make sure there is room
17345 // for safepoint patching
17346 instruct Ret()
17347 %{
17348 match(Return);
17349
17350 format %{ "ret" %}
17351 ins_encode %{
17352 __ ret(0);
17353 %}
17354 ins_pipe(pipe_jmp);
17355 %}
17356
17357 // Tail Call; Jump from runtime stub to Java code.
17358 // Also known as an 'interprocedural jump'.
17359 // Target of jump will eventually return to caller.
17360 // TailJump below removes the return address.
17361 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17362 // emitted just above the TailCall which has reset rbp to the caller state.
17363 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17364 %{
17365 match(TailCall jump_target method_ptr);
17366
17367 ins_cost(300);
17368 format %{ "jmp $jump_target\t# rbx holds method" %}
17369 ins_encode %{
17370 __ jmp($jump_target$$Register);
17371 %}
17372 ins_pipe(pipe_jmp);
17373 %}
17374
17375 // Tail Jump; remove the return address; jump to target.
17376 // TailCall above leaves the return address around.
17377 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17378 %{
17379 match(TailJump jump_target ex_oop);
17380
17381 ins_cost(300);
17382 format %{ "popq rdx\t# pop return address\n\t"
17383 "jmp $jump_target" %}
17384 ins_encode %{
17385 __ popq(as_Register(RDX_enc));
17386 __ jmp($jump_target$$Register);
17387 %}
17388 ins_pipe(pipe_jmp);
17389 %}
17390
17391 // Forward exception.
17392 instruct ForwardExceptionjmp()
17393 %{
17394 match(ForwardException);
17395
17396 format %{ "jmp forward_exception_stub" %}
17397 ins_encode %{
17398 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17399 %}
17400 ins_pipe(pipe_jmp);
17401 %}
17402
17403 // Create exception oop: created by stack-crawling runtime code.
17404 // Created exception is now available to this handler, and is setup
17405 // just prior to jumping to this handler. No code emitted.
17406 instruct CreateException(rax_RegP ex_oop)
17407 %{
17408 match(Set ex_oop (CreateEx));
17409
17410 size(0);
17411 // use the following format syntax
17412 format %{ "# exception oop is in rax; no code emitted" %}
17413 ins_encode();
17414 ins_pipe(empty);
17415 %}
17416
17417 // Rethrow exception:
17418 // The exception oop will come in the first argument position.
17419 // Then JUMP (not call) to the rethrow stub code.
17420 instruct RethrowException()
17421 %{
17422 match(Rethrow);
17423
17424 // use the following format syntax
17425 format %{ "jmp rethrow_stub" %}
17426 ins_encode %{
17427 __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17428 %}
17429 ins_pipe(pipe_jmp);
17430 %}
17431
17432 // ============================================================================
17433 // This name is KNOWN by the ADLC and cannot be changed.
17434 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17435 // for this guy.
17436 instruct tlsLoadP(r15_RegP dst) %{
17437 match(Set dst (ThreadLocal));
17438 effect(DEF dst);
17439
17440 size(0);
17441 format %{ "# TLS is in R15" %}
17442 ins_encode( /*empty encoding*/ );
17443 ins_pipe(ialu_reg_reg);
17444 %}
17445
17446 instruct addF_reg(regF dst, regF src) %{
17447 predicate(UseAVX == 0);
17448 match(Set dst (AddF dst src));
17449
17450 format %{ "addss $dst, $src" %}
17451 ins_cost(150);
17452 ins_encode %{
17453 __ addss($dst$$XMMRegister, $src$$XMMRegister);
17454 %}
17455 ins_pipe(pipe_slow);
17456 %}
17457
17458 instruct addF_mem(regF dst, memory src) %{
17459 predicate(UseAVX == 0);
17460 match(Set dst (AddF dst (LoadF src)));
17461
17462 format %{ "addss $dst, $src" %}
17463 ins_cost(150);
17464 ins_encode %{
17465 __ addss($dst$$XMMRegister, $src$$Address);
17466 %}
17467 ins_pipe(pipe_slow);
17468 %}
17469
17470 instruct addF_imm(regF dst, immF con) %{
17471 predicate(UseAVX == 0);
17472 match(Set dst (AddF dst con));
17473 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17474 ins_cost(150);
17475 ins_encode %{
17476 __ addss($dst$$XMMRegister, $constantaddress($con));
17477 %}
17478 ins_pipe(pipe_slow);
17479 %}
17480
17481 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17482 predicate(UseAVX > 0);
17483 match(Set dst (AddF src1 src2));
17484
17485 format %{ "vaddss $dst, $src1, $src2" %}
17486 ins_cost(150);
17487 ins_encode %{
17488 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17489 %}
17490 ins_pipe(pipe_slow);
17491 %}
17492
17493 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17494 predicate(UseAVX > 0);
17495 match(Set dst (AddF src1 (LoadF src2)));
17496
17497 format %{ "vaddss $dst, $src1, $src2" %}
17498 ins_cost(150);
17499 ins_encode %{
17500 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17501 %}
17502 ins_pipe(pipe_slow);
17503 %}
17504
17505 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17506 predicate(UseAVX > 0);
17507 match(Set dst (AddF src con));
17508
17509 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17510 ins_cost(150);
17511 ins_encode %{
17512 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17513 %}
17514 ins_pipe(pipe_slow);
17515 %}
17516
17517 instruct addD_reg(regD dst, regD src) %{
17518 predicate(UseAVX == 0);
17519 match(Set dst (AddD dst src));
17520
17521 format %{ "addsd $dst, $src" %}
17522 ins_cost(150);
17523 ins_encode %{
17524 __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17525 %}
17526 ins_pipe(pipe_slow);
17527 %}
17528
17529 instruct addD_mem(regD dst, memory src) %{
17530 predicate(UseAVX == 0);
17531 match(Set dst (AddD dst (LoadD src)));
17532
17533 format %{ "addsd $dst, $src" %}
17534 ins_cost(150);
17535 ins_encode %{
17536 __ addsd($dst$$XMMRegister, $src$$Address);
17537 %}
17538 ins_pipe(pipe_slow);
17539 %}
17540
17541 instruct addD_imm(regD dst, immD con) %{
17542 predicate(UseAVX == 0);
17543 match(Set dst (AddD dst con));
17544 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17545 ins_cost(150);
17546 ins_encode %{
17547 __ addsd($dst$$XMMRegister, $constantaddress($con));
17548 %}
17549 ins_pipe(pipe_slow);
17550 %}
17551
17552 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17553 predicate(UseAVX > 0);
17554 match(Set dst (AddD src1 src2));
17555
17556 format %{ "vaddsd $dst, $src1, $src2" %}
17557 ins_cost(150);
17558 ins_encode %{
17559 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17560 %}
17561 ins_pipe(pipe_slow);
17562 %}
17563
17564 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17565 predicate(UseAVX > 0);
17566 match(Set dst (AddD src1 (LoadD src2)));
17567
17568 format %{ "vaddsd $dst, $src1, $src2" %}
17569 ins_cost(150);
17570 ins_encode %{
17571 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17572 %}
17573 ins_pipe(pipe_slow);
17574 %}
17575
17576 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17577 predicate(UseAVX > 0);
17578 match(Set dst (AddD src con));
17579
17580 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17581 ins_cost(150);
17582 ins_encode %{
17583 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17584 %}
17585 ins_pipe(pipe_slow);
17586 %}
17587
17588 instruct subF_reg(regF dst, regF src) %{
17589 predicate(UseAVX == 0);
17590 match(Set dst (SubF dst src));
17591
17592 format %{ "subss $dst, $src" %}
17593 ins_cost(150);
17594 ins_encode %{
17595 __ subss($dst$$XMMRegister, $src$$XMMRegister);
17596 %}
17597 ins_pipe(pipe_slow);
17598 %}
17599
17600 instruct subF_mem(regF dst, memory src) %{
17601 predicate(UseAVX == 0);
17602 match(Set dst (SubF dst (LoadF src)));
17603
17604 format %{ "subss $dst, $src" %}
17605 ins_cost(150);
17606 ins_encode %{
17607 __ subss($dst$$XMMRegister, $src$$Address);
17608 %}
17609 ins_pipe(pipe_slow);
17610 %}
17611
17612 instruct subF_imm(regF dst, immF con) %{
17613 predicate(UseAVX == 0);
17614 match(Set dst (SubF dst con));
17615 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17616 ins_cost(150);
17617 ins_encode %{
17618 __ subss($dst$$XMMRegister, $constantaddress($con));
17619 %}
17620 ins_pipe(pipe_slow);
17621 %}
17622
17623 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17624 predicate(UseAVX > 0);
17625 match(Set dst (SubF src1 src2));
17626
17627 format %{ "vsubss $dst, $src1, $src2" %}
17628 ins_cost(150);
17629 ins_encode %{
17630 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17631 %}
17632 ins_pipe(pipe_slow);
17633 %}
17634
17635 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17636 predicate(UseAVX > 0);
17637 match(Set dst (SubF src1 (LoadF src2)));
17638
17639 format %{ "vsubss $dst, $src1, $src2" %}
17640 ins_cost(150);
17641 ins_encode %{
17642 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17643 %}
17644 ins_pipe(pipe_slow);
17645 %}
17646
17647 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17648 predicate(UseAVX > 0);
17649 match(Set dst (SubF src con));
17650
17651 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17652 ins_cost(150);
17653 ins_encode %{
17654 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17655 %}
17656 ins_pipe(pipe_slow);
17657 %}
17658
17659 instruct subD_reg(regD dst, regD src) %{
17660 predicate(UseAVX == 0);
17661 match(Set dst (SubD dst src));
17662
17663 format %{ "subsd $dst, $src" %}
17664 ins_cost(150);
17665 ins_encode %{
17666 __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17667 %}
17668 ins_pipe(pipe_slow);
17669 %}
17670
17671 instruct subD_mem(regD dst, memory src) %{
17672 predicate(UseAVX == 0);
17673 match(Set dst (SubD dst (LoadD src)));
17674
17675 format %{ "subsd $dst, $src" %}
17676 ins_cost(150);
17677 ins_encode %{
17678 __ subsd($dst$$XMMRegister, $src$$Address);
17679 %}
17680 ins_pipe(pipe_slow);
17681 %}
17682
17683 instruct subD_imm(regD dst, immD con) %{
17684 predicate(UseAVX == 0);
17685 match(Set dst (SubD dst con));
17686 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17687 ins_cost(150);
17688 ins_encode %{
17689 __ subsd($dst$$XMMRegister, $constantaddress($con));
17690 %}
17691 ins_pipe(pipe_slow);
17692 %}
17693
17694 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17695 predicate(UseAVX > 0);
17696 match(Set dst (SubD src1 src2));
17697
17698 format %{ "vsubsd $dst, $src1, $src2" %}
17699 ins_cost(150);
17700 ins_encode %{
17701 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17702 %}
17703 ins_pipe(pipe_slow);
17704 %}
17705
17706 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17707 predicate(UseAVX > 0);
17708 match(Set dst (SubD src1 (LoadD src2)));
17709
17710 format %{ "vsubsd $dst, $src1, $src2" %}
17711 ins_cost(150);
17712 ins_encode %{
17713 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17714 %}
17715 ins_pipe(pipe_slow);
17716 %}
17717
17718 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17719 predicate(UseAVX > 0);
17720 match(Set dst (SubD src con));
17721
17722 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17723 ins_cost(150);
17724 ins_encode %{
17725 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17726 %}
17727 ins_pipe(pipe_slow);
17728 %}
17729
17730 instruct mulF_reg(regF dst, regF src) %{
17731 predicate(UseAVX == 0);
17732 match(Set dst (MulF dst src));
17733
17734 format %{ "mulss $dst, $src" %}
17735 ins_cost(150);
17736 ins_encode %{
17737 __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17738 %}
17739 ins_pipe(pipe_slow);
17740 %}
17741
17742 instruct mulF_mem(regF dst, memory src) %{
17743 predicate(UseAVX == 0);
17744 match(Set dst (MulF dst (LoadF src)));
17745
17746 format %{ "mulss $dst, $src" %}
17747 ins_cost(150);
17748 ins_encode %{
17749 __ mulss($dst$$XMMRegister, $src$$Address);
17750 %}
17751 ins_pipe(pipe_slow);
17752 %}
17753
17754 instruct mulF_imm(regF dst, immF con) %{
17755 predicate(UseAVX == 0);
17756 match(Set dst (MulF dst con));
17757 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17758 ins_cost(150);
17759 ins_encode %{
17760 __ mulss($dst$$XMMRegister, $constantaddress($con));
17761 %}
17762 ins_pipe(pipe_slow);
17763 %}
17764
17765 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17766 predicate(UseAVX > 0);
17767 match(Set dst (MulF src1 src2));
17768
17769 format %{ "vmulss $dst, $src1, $src2" %}
17770 ins_cost(150);
17771 ins_encode %{
17772 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17773 %}
17774 ins_pipe(pipe_slow);
17775 %}
17776
17777 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17778 predicate(UseAVX > 0);
17779 match(Set dst (MulF src1 (LoadF src2)));
17780
17781 format %{ "vmulss $dst, $src1, $src2" %}
17782 ins_cost(150);
17783 ins_encode %{
17784 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17785 %}
17786 ins_pipe(pipe_slow);
17787 %}
17788
17789 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17790 predicate(UseAVX > 0);
17791 match(Set dst (MulF src con));
17792
17793 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17794 ins_cost(150);
17795 ins_encode %{
17796 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17797 %}
17798 ins_pipe(pipe_slow);
17799 %}
17800
17801 instruct mulD_reg(regD dst, regD src) %{
17802 predicate(UseAVX == 0);
17803 match(Set dst (MulD dst src));
17804
17805 format %{ "mulsd $dst, $src" %}
17806 ins_cost(150);
17807 ins_encode %{
17808 __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17809 %}
17810 ins_pipe(pipe_slow);
17811 %}
17812
17813 instruct mulD_mem(regD dst, memory src) %{
17814 predicate(UseAVX == 0);
17815 match(Set dst (MulD dst (LoadD src)));
17816
17817 format %{ "mulsd $dst, $src" %}
17818 ins_cost(150);
17819 ins_encode %{
17820 __ mulsd($dst$$XMMRegister, $src$$Address);
17821 %}
17822 ins_pipe(pipe_slow);
17823 %}
17824
17825 instruct mulD_imm(regD dst, immD con) %{
17826 predicate(UseAVX == 0);
17827 match(Set dst (MulD dst con));
17828 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17829 ins_cost(150);
17830 ins_encode %{
17831 __ mulsd($dst$$XMMRegister, $constantaddress($con));
17832 %}
17833 ins_pipe(pipe_slow);
17834 %}
17835
17836 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17837 predicate(UseAVX > 0);
17838 match(Set dst (MulD src1 src2));
17839
17840 format %{ "vmulsd $dst, $src1, $src2" %}
17841 ins_cost(150);
17842 ins_encode %{
17843 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17844 %}
17845 ins_pipe(pipe_slow);
17846 %}
17847
17848 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17849 predicate(UseAVX > 0);
17850 match(Set dst (MulD src1 (LoadD src2)));
17851
17852 format %{ "vmulsd $dst, $src1, $src2" %}
17853 ins_cost(150);
17854 ins_encode %{
17855 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17856 %}
17857 ins_pipe(pipe_slow);
17858 %}
17859
17860 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17861 predicate(UseAVX > 0);
17862 match(Set dst (MulD src con));
17863
17864 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17865 ins_cost(150);
17866 ins_encode %{
17867 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17868 %}
17869 ins_pipe(pipe_slow);
17870 %}
17871
17872 instruct divF_reg(regF dst, regF src) %{
17873 predicate(UseAVX == 0);
17874 match(Set dst (DivF dst src));
17875
17876 format %{ "divss $dst, $src" %}
17877 ins_cost(150);
17878 ins_encode %{
17879 __ divss($dst$$XMMRegister, $src$$XMMRegister);
17880 %}
17881 ins_pipe(pipe_slow);
17882 %}
17883
17884 instruct divF_mem(regF dst, memory src) %{
17885 predicate(UseAVX == 0);
17886 match(Set dst (DivF dst (LoadF src)));
17887
17888 format %{ "divss $dst, $src" %}
17889 ins_cost(150);
17890 ins_encode %{
17891 __ divss($dst$$XMMRegister, $src$$Address);
17892 %}
17893 ins_pipe(pipe_slow);
17894 %}
17895
17896 instruct divF_imm(regF dst, immF con) %{
17897 predicate(UseAVX == 0);
17898 match(Set dst (DivF dst con));
17899 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17900 ins_cost(150);
17901 ins_encode %{
17902 __ divss($dst$$XMMRegister, $constantaddress($con));
17903 %}
17904 ins_pipe(pipe_slow);
17905 %}
17906
17907 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17908 predicate(UseAVX > 0);
17909 match(Set dst (DivF src1 src2));
17910
17911 format %{ "vdivss $dst, $src1, $src2" %}
17912 ins_cost(150);
17913 ins_encode %{
17914 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17915 %}
17916 ins_pipe(pipe_slow);
17917 %}
17918
17919 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17920 predicate(UseAVX > 0);
17921 match(Set dst (DivF src1 (LoadF src2)));
17922
17923 format %{ "vdivss $dst, $src1, $src2" %}
17924 ins_cost(150);
17925 ins_encode %{
17926 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17927 %}
17928 ins_pipe(pipe_slow);
17929 %}
17930
17931 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17932 predicate(UseAVX > 0);
17933 match(Set dst (DivF src con));
17934
17935 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17936 ins_cost(150);
17937 ins_encode %{
17938 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17939 %}
17940 ins_pipe(pipe_slow);
17941 %}
17942
17943 instruct divD_reg(regD dst, regD src) %{
17944 predicate(UseAVX == 0);
17945 match(Set dst (DivD dst src));
17946
17947 format %{ "divsd $dst, $src" %}
17948 ins_cost(150);
17949 ins_encode %{
17950 __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17951 %}
17952 ins_pipe(pipe_slow);
17953 %}
17954
17955 instruct divD_mem(regD dst, memory src) %{
17956 predicate(UseAVX == 0);
17957 match(Set dst (DivD dst (LoadD src)));
17958
17959 format %{ "divsd $dst, $src" %}
17960 ins_cost(150);
17961 ins_encode %{
17962 __ divsd($dst$$XMMRegister, $src$$Address);
17963 %}
17964 ins_pipe(pipe_slow);
17965 %}
17966
17967 instruct divD_imm(regD dst, immD con) %{
17968 predicate(UseAVX == 0);
17969 match(Set dst (DivD dst con));
17970 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17971 ins_cost(150);
17972 ins_encode %{
17973 __ divsd($dst$$XMMRegister, $constantaddress($con));
17974 %}
17975 ins_pipe(pipe_slow);
17976 %}
17977
17978 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17979 predicate(UseAVX > 0);
17980 match(Set dst (DivD src1 src2));
17981
17982 format %{ "vdivsd $dst, $src1, $src2" %}
17983 ins_cost(150);
17984 ins_encode %{
17985 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17986 %}
17987 ins_pipe(pipe_slow);
17988 %}
17989
17990 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17991 predicate(UseAVX > 0);
17992 match(Set dst (DivD src1 (LoadD src2)));
17993
17994 format %{ "vdivsd $dst, $src1, $src2" %}
17995 ins_cost(150);
17996 ins_encode %{
17997 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17998 %}
17999 ins_pipe(pipe_slow);
18000 %}
18001
18002 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18003 predicate(UseAVX > 0);
18004 match(Set dst (DivD src con));
18005
18006 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18007 ins_cost(150);
18008 ins_encode %{
18009 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18010 %}
18011 ins_pipe(pipe_slow);
18012 %}
18013
18014 instruct absF_reg(regF dst) %{
18015 predicate(UseAVX == 0);
18016 match(Set dst (AbsF dst));
18017 ins_cost(150);
18018 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
18019 ins_encode %{
18020 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18021 %}
18022 ins_pipe(pipe_slow);
18023 %}
18024
18025 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18026 predicate(UseAVX > 0);
18027 match(Set dst (AbsF src));
18028 ins_cost(150);
18029 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18030 ins_encode %{
18031 int vlen_enc = Assembler::AVX_128bit;
18032 __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18033 ExternalAddress(float_signmask()), vlen_enc);
18034 %}
18035 ins_pipe(pipe_slow);
18036 %}
18037
18038 instruct absD_reg(regD dst) %{
18039 predicate(UseAVX == 0);
18040 match(Set dst (AbsD dst));
18041 ins_cost(150);
18042 format %{ "andpd $dst, [0x7fffffffffffffff]\t"
18043 "# abs double by sign masking" %}
18044 ins_encode %{
18045 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18046 %}
18047 ins_pipe(pipe_slow);
18048 %}
18049
18050 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18051 predicate(UseAVX > 0);
18052 match(Set dst (AbsD src));
18053 ins_cost(150);
18054 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
18055 "# abs double by sign masking" %}
18056 ins_encode %{
18057 int vlen_enc = Assembler::AVX_128bit;
18058 __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18059 ExternalAddress(double_signmask()), vlen_enc);
18060 %}
18061 ins_pipe(pipe_slow);
18062 %}
18063
18064 instruct negF_reg(regF dst) %{
18065 predicate(UseAVX == 0);
18066 match(Set dst (NegF dst));
18067 ins_cost(150);
18068 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
18069 ins_encode %{
18070 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18071 %}
18072 ins_pipe(pipe_slow);
18073 %}
18074
18075 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18076 predicate(UseAVX > 0);
18077 match(Set dst (NegF src));
18078 ins_cost(150);
18079 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18080 ins_encode %{
18081 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18082 ExternalAddress(float_signflip()));
18083 %}
18084 ins_pipe(pipe_slow);
18085 %}
18086
18087 instruct negD_reg(regD dst) %{
18088 predicate(UseAVX == 0);
18089 match(Set dst (NegD dst));
18090 ins_cost(150);
18091 format %{ "xorpd $dst, [0x8000000000000000]\t"
18092 "# neg double by sign flipping" %}
18093 ins_encode %{
18094 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18095 %}
18096 ins_pipe(pipe_slow);
18097 %}
18098
18099 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18100 predicate(UseAVX > 0);
18101 match(Set dst (NegD src));
18102 ins_cost(150);
18103 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
18104 "# neg double by sign flipping" %}
18105 ins_encode %{
18106 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18107 ExternalAddress(double_signflip()));
18108 %}
18109 ins_pipe(pipe_slow);
18110 %}
18111
18112 // sqrtss instruction needs destination register to be pre initialized for best performance
18113 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18114 instruct sqrtF_reg(regF dst) %{
18115 match(Set dst (SqrtF dst));
18116 format %{ "sqrtss $dst, $dst" %}
18117 ins_encode %{
18118 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18119 %}
18120 ins_pipe(pipe_slow);
18121 %}
18122
18123 // sqrtsd instruction needs destination register to be pre initialized for best performance
18124 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18125 instruct sqrtD_reg(regD dst) %{
18126 match(Set dst (SqrtD dst));
18127 format %{ "sqrtsd $dst, $dst" %}
18128 ins_encode %{
18129 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18130 %}
18131 ins_pipe(pipe_slow);
18132 %}
18133
18134 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18135 effect(TEMP tmp);
18136 match(Set dst (ConvF2HF src));
18137 ins_cost(125);
18138 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18139 ins_encode %{
18140 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18141 %}
18142 ins_pipe( pipe_slow );
18143 %}
18144
18145 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18146 predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18147 effect(TEMP ktmp, TEMP rtmp);
18148 match(Set mem (StoreC mem (ConvF2HF src)));
18149 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18150 ins_encode %{
18151 __ movl($rtmp$$Register, 0x1);
18152 __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18153 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18154 %}
18155 ins_pipe( pipe_slow );
18156 %}
18157
18158 instruct vconvF2HF(vec dst, vec src) %{
18159 match(Set dst (VectorCastF2HF src));
18160 format %{ "vector_conv_F2HF $dst $src" %}
18161 ins_encode %{
18162 int vlen_enc = vector_length_encoding(this, $src);
18163 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18164 %}
18165 ins_pipe( pipe_slow );
18166 %}
18167
18168 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18169 predicate(n->as_StoreVector()->memory_size() >= 16);
18170 match(Set mem (StoreVector mem (VectorCastF2HF src)));
18171 format %{ "vcvtps2ph $mem,$src" %}
18172 ins_encode %{
18173 int vlen_enc = vector_length_encoding(this, $src);
18174 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18175 %}
18176 ins_pipe( pipe_slow );
18177 %}
18178
18179 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18180 match(Set dst (ConvHF2F src));
18181 format %{ "vcvtph2ps $dst,$src" %}
18182 ins_encode %{
18183 __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18184 %}
18185 ins_pipe( pipe_slow );
18186 %}
18187
18188 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18189 match(Set dst (VectorCastHF2F (LoadVector mem)));
18190 format %{ "vcvtph2ps $dst,$mem" %}
18191 ins_encode %{
18192 int vlen_enc = vector_length_encoding(this);
18193 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18194 %}
18195 ins_pipe( pipe_slow );
18196 %}
18197
18198 instruct vconvHF2F(vec dst, vec src) %{
18199 match(Set dst (VectorCastHF2F src));
18200 ins_cost(125);
18201 format %{ "vector_conv_HF2F $dst,$src" %}
18202 ins_encode %{
18203 int vlen_enc = vector_length_encoding(this);
18204 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18205 %}
18206 ins_pipe( pipe_slow );
18207 %}
18208
18209 // ---------------------------------------- VectorReinterpret ------------------------------------
18210 instruct reinterpret_mask(kReg dst) %{
18211 predicate(n->bottom_type()->isa_pvectmask() &&
18212 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18213 match(Set dst (VectorReinterpret dst));
18214 ins_cost(125);
18215 format %{ "vector_reinterpret $dst\t!" %}
18216 ins_encode %{
18217 // empty
18218 %}
18219 ins_pipe( pipe_slow );
18220 %}
18221
18222 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18223 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18224 n->bottom_type()->isa_pvectmask() &&
18225 n->in(1)->bottom_type()->isa_pvectmask() &&
18226 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18227 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18228 match(Set dst (VectorReinterpret src));
18229 effect(TEMP xtmp);
18230 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18231 ins_encode %{
18232 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18233 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18234 assert(src_sz == dst_sz , "src and dst size mismatch");
18235 int vlen_enc = vector_length_encoding(src_sz);
18236 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18237 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18238 %}
18239 ins_pipe( pipe_slow );
18240 %}
18241
18242 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18243 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18244 n->bottom_type()->isa_pvectmask() &&
18245 n->in(1)->bottom_type()->isa_pvectmask() &&
18246 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18247 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18248 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18249 match(Set dst (VectorReinterpret src));
18250 effect(TEMP xtmp);
18251 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18252 ins_encode %{
18253 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18254 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18255 assert(src_sz == dst_sz , "src and dst size mismatch");
18256 int vlen_enc = vector_length_encoding(src_sz);
18257 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18258 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18259 %}
18260 ins_pipe( pipe_slow );
18261 %}
18262
18263 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18264 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18265 n->bottom_type()->isa_pvectmask() &&
18266 n->in(1)->bottom_type()->isa_pvectmask() &&
18267 (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18268 n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18269 n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18270 match(Set dst (VectorReinterpret src));
18271 effect(TEMP xtmp);
18272 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18273 ins_encode %{
18274 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18275 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18276 assert(src_sz == dst_sz , "src and dst size mismatch");
18277 int vlen_enc = vector_length_encoding(src_sz);
18278 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18279 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18280 %}
18281 ins_pipe( pipe_slow );
18282 %}
18283
18284 instruct reinterpret(vec dst) %{
18285 predicate(!n->bottom_type()->isa_pvectmask() &&
18286 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18287 match(Set dst (VectorReinterpret dst));
18288 ins_cost(125);
18289 format %{ "vector_reinterpret $dst\t!" %}
18290 ins_encode %{
18291 // empty
18292 %}
18293 ins_pipe( pipe_slow );
18294 %}
18295
18296 instruct reinterpret_expand(vec dst, vec src) %{
18297 predicate(UseAVX == 0 &&
18298 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18299 match(Set dst (VectorReinterpret src));
18300 ins_cost(125);
18301 effect(TEMP dst);
18302 format %{ "vector_reinterpret_expand $dst,$src" %}
18303 ins_encode %{
18304 assert(Matcher::vector_length_in_bytes(this) <= 16, "required");
18305 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required");
18306
18307 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18308 if (src_vlen_in_bytes == 4) {
18309 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18310 } else {
18311 assert(src_vlen_in_bytes == 8, "");
18312 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18313 }
18314 __ pand($dst$$XMMRegister, $src$$XMMRegister);
18315 %}
18316 ins_pipe( pipe_slow );
18317 %}
18318
18319 instruct vreinterpret_expand4(legVec dst, vec src) %{
18320 predicate(UseAVX > 0 &&
18321 !n->bottom_type()->isa_pvectmask() &&
18322 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18323 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18324 match(Set dst (VectorReinterpret src));
18325 ins_cost(125);
18326 format %{ "vector_reinterpret_expand $dst,$src" %}
18327 ins_encode %{
18328 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18329 %}
18330 ins_pipe( pipe_slow );
18331 %}
18332
18333
18334 instruct vreinterpret_expand(legVec dst, vec src) %{
18335 predicate(UseAVX > 0 &&
18336 !n->bottom_type()->isa_pvectmask() &&
18337 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18338 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18339 match(Set dst (VectorReinterpret src));
18340 ins_cost(125);
18341 format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18342 ins_encode %{
18343 switch (Matcher::vector_length_in_bytes(this, $src)) {
18344 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18345 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18346 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18347 default: ShouldNotReachHere();
18348 }
18349 %}
18350 ins_pipe( pipe_slow );
18351 %}
18352
18353 instruct reinterpret_shrink(vec dst, legVec src) %{
18354 predicate(!n->bottom_type()->isa_pvectmask() &&
18355 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18356 match(Set dst (VectorReinterpret src));
18357 ins_cost(125);
18358 format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18359 ins_encode %{
18360 switch (Matcher::vector_length_in_bytes(this)) {
18361 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18362 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
18363 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18364 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18365 default: ShouldNotReachHere();
18366 }
18367 %}
18368 ins_pipe( pipe_slow );
18369 %}
18370
18371 // ----------------------------------------------------------------------------------------------------
18372
18373 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18374 match(Set dst (RoundDoubleMode src rmode));
18375 format %{ "roundsd $dst,$src" %}
18376 ins_cost(150);
18377 ins_encode %{
18378 assert(UseSSE >= 4, "required");
18379 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18380 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18381 }
18382 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18383 %}
18384 ins_pipe(pipe_slow);
18385 %}
18386
18387 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18388 match(Set dst (RoundDoubleMode con rmode));
18389 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18390 ins_cost(150);
18391 ins_encode %{
18392 assert(UseSSE >= 4, "required");
18393 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18394 %}
18395 ins_pipe(pipe_slow);
18396 %}
18397
18398 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18399 predicate(Matcher::vector_length(n) < 8);
18400 match(Set dst (RoundDoubleModeV src rmode));
18401 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18402 ins_encode %{
18403 assert(UseAVX > 0, "required");
18404 int vlen_enc = vector_length_encoding(this);
18405 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18406 %}
18407 ins_pipe( pipe_slow );
18408 %}
18409
18410 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18411 predicate(Matcher::vector_length(n) == 8);
18412 match(Set dst (RoundDoubleModeV src rmode));
18413 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18414 ins_encode %{
18415 assert(UseAVX > 2, "required");
18416 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18417 %}
18418 ins_pipe( pipe_slow );
18419 %}
18420
18421 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18422 predicate(Matcher::vector_length(n) < 8);
18423 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18424 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18425 ins_encode %{
18426 assert(UseAVX > 0, "required");
18427 int vlen_enc = vector_length_encoding(this);
18428 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18429 %}
18430 ins_pipe( pipe_slow );
18431 %}
18432
18433 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18434 predicate(Matcher::vector_length(n) == 8);
18435 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18436 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18437 ins_encode %{
18438 assert(UseAVX > 2, "required");
18439 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18440 %}
18441 ins_pipe( pipe_slow );
18442 %}
18443
18444 instruct onspinwait() %{
18445 match(OnSpinWait);
18446 ins_cost(200);
18447
18448 format %{
18449 $$template
18450 $$emit$$"pause\t! membar_onspinwait"
18451 %}
18452 ins_encode %{
18453 __ pause();
18454 %}
18455 ins_pipe(pipe_slow);
18456 %}
18457
18458 // a * b + c
18459 instruct fmaD_reg(regD a, regD b, regD c) %{
18460 match(Set c (FmaD c (Binary a b)));
18461 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18462 ins_cost(150);
18463 ins_encode %{
18464 assert(UseFMA, "Needs FMA instructions support.");
18465 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18466 %}
18467 ins_pipe( pipe_slow );
18468 %}
18469
18470 // a * b + c
18471 instruct fmaF_reg(regF a, regF b, regF c) %{
18472 match(Set c (FmaF c (Binary a b)));
18473 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18474 ins_cost(150);
18475 ins_encode %{
18476 assert(UseFMA, "Needs FMA instructions support.");
18477 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18478 %}
18479 ins_pipe( pipe_slow );
18480 %}
18481
18482 // ====================VECTOR INSTRUCTIONS=====================================
18483
18484 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18485 instruct MoveVec2Leg(legVec dst, vec src) %{
18486 match(Set dst src);
18487 format %{ "" %}
18488 ins_encode %{
18489 ShouldNotReachHere();
18490 %}
18491 ins_pipe( fpu_reg_reg );
18492 %}
18493
18494 instruct MoveLeg2Vec(vec dst, legVec src) %{
18495 match(Set dst src);
18496 format %{ "" %}
18497 ins_encode %{
18498 ShouldNotReachHere();
18499 %}
18500 ins_pipe( fpu_reg_reg );
18501 %}
18502
18503 // ============================================================================
18504
18505 // Load vectors generic operand pattern
18506 instruct loadV(vec dst, memory mem) %{
18507 match(Set dst (LoadVector mem));
18508 ins_cost(125);
18509 format %{ "load_vector $dst,$mem" %}
18510 ins_encode %{
18511 BasicType bt = Matcher::vector_element_basic_type(this);
18512 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18513 %}
18514 ins_pipe( pipe_slow );
18515 %}
18516
18517 // Store vectors generic operand pattern.
18518 instruct storeV(memory mem, vec src) %{
18519 match(Set mem (StoreVector mem src));
18520 ins_cost(145);
18521 format %{ "store_vector $mem,$src\n\t" %}
18522 ins_encode %{
18523 switch (Matcher::vector_length_in_bytes(this, $src)) {
18524 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
18525 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
18526 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
18527 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
18528 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18529 default: ShouldNotReachHere();
18530 }
18531 %}
18532 ins_pipe( pipe_slow );
18533 %}
18534
18535 // ---------------------------------------- Gather ------------------------------------
18536
18537 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18538
18539 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18540 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18541 Matcher::vector_length_in_bytes(n) <= 32);
18542 match(Set dst (LoadVectorGather mem idx));
18543 effect(TEMP dst, TEMP tmp, TEMP mask);
18544 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18545 ins_encode %{
18546 int vlen_enc = vector_length_encoding(this);
18547 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18548 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18549 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18550 __ lea($tmp$$Register, $mem$$Address);
18551 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18552 %}
18553 ins_pipe( pipe_slow );
18554 %}
18555
18556
18557 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18558 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18559 !is_subword_type(Matcher::vector_element_basic_type(n)));
18560 match(Set dst (LoadVectorGather mem idx));
18561 effect(TEMP dst, TEMP tmp, TEMP ktmp);
18562 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18563 ins_encode %{
18564 int vlen_enc = vector_length_encoding(this);
18565 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18566 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18567 __ lea($tmp$$Register, $mem$$Address);
18568 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18569 %}
18570 ins_pipe( pipe_slow );
18571 %}
18572
18573 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18574 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18575 !is_subword_type(Matcher::vector_element_basic_type(n)));
18576 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18577 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18578 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18579 ins_encode %{
18580 assert(UseAVX > 2, "sanity");
18581 int vlen_enc = vector_length_encoding(this);
18582 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18583 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18584 // Note: Since gather instruction partially updates the opmask register used
18585 // for predication hense moving mask operand to a temporary.
18586 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18587 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18588 __ lea($tmp$$Register, $mem$$Address);
18589 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18590 %}
18591 ins_pipe( pipe_slow );
18592 %}
18593
18594 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18595 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18596 match(Set dst (LoadVectorGather mem idx_base));
18597 effect(TEMP tmp, TEMP rtmp);
18598 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18599 ins_encode %{
18600 int vlen_enc = vector_length_encoding(this);
18601 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18602 __ lea($tmp$$Register, $mem$$Address);
18603 __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18604 %}
18605 ins_pipe( pipe_slow );
18606 %}
18607
18608 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18609 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18610 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18611 match(Set dst (LoadVectorGather mem idx_base));
18612 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18613 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18614 ins_encode %{
18615 int vlen_enc = vector_length_encoding(this);
18616 int vector_len = Matcher::vector_length(this);
18617 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18618 __ lea($tmp$$Register, $mem$$Address);
18619 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18620 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18621 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18622 %}
18623 ins_pipe( pipe_slow );
18624 %}
18625
18626 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18627 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18628 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18629 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18630 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18631 ins_encode %{
18632 int vlen_enc = vector_length_encoding(this);
18633 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18634 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18635 __ lea($tmp$$Register, $mem$$Address);
18636 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18637 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18638 %}
18639 ins_pipe( pipe_slow );
18640 %}
18641
18642 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18643 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18644 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18645 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18646 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18647 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18648 ins_encode %{
18649 int vlen_enc = vector_length_encoding(this);
18650 int vector_len = Matcher::vector_length(this);
18651 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18652 __ xorq($mask_idx$$Register, $mask_idx$$Register);
18653 __ lea($tmp$$Register, $mem$$Address);
18654 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18655 __ kmovql($rtmp2$$Register, $mask$$KRegister);
18656 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18657 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18658 %}
18659 ins_pipe( pipe_slow );
18660 %}
18661
18662 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18663 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18664 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18665 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18666 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18667 ins_encode %{
18668 int vlen_enc = vector_length_encoding(this);
18669 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18670 __ lea($tmp$$Register, $mem$$Address);
18671 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18672 if (elem_bt == T_SHORT) {
18673 __ movl($mask_idx$$Register, 0x55555555);
18674 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18675 }
18676 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18677 __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18678 %}
18679 ins_pipe( pipe_slow );
18680 %}
18681
18682 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18683 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18684 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18685 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18686 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18687 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18688 ins_encode %{
18689 int vlen_enc = vector_length_encoding(this);
18690 int vector_len = Matcher::vector_length(this);
18691 BasicType elem_bt = Matcher::vector_element_basic_type(this);
18692 __ lea($tmp$$Register, $mem$$Address);
18693 __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18694 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18695 if (elem_bt == T_SHORT) {
18696 __ movl($mask_idx$$Register, 0x55555555);
18697 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18698 }
18699 __ xorl($mask_idx$$Register, $mask_idx$$Register);
18700 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18701 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18702 %}
18703 ins_pipe( pipe_slow );
18704 %}
18705
18706 // ====================Scatter=======================================
18707
18708 // Scatter INT, LONG, FLOAT, DOUBLE
18709
18710 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18711 predicate(UseAVX > 2);
18712 match(Set mem (StoreVectorScatter mem (Binary src idx)));
18713 effect(TEMP tmp, TEMP ktmp);
18714 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18715 ins_encode %{
18716 int vlen_enc = vector_length_encoding(this, $src);
18717 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18718
18719 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18720 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18721
18722 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18723 __ lea($tmp$$Register, $mem$$Address);
18724 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18725 %}
18726 ins_pipe( pipe_slow );
18727 %}
18728
18729 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18730 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18731 effect(TEMP tmp, TEMP ktmp);
18732 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18733 ins_encode %{
18734 int vlen_enc = vector_length_encoding(this, $src);
18735 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18736 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18737 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18738 // Note: Since scatter instruction partially updates the opmask register used
18739 // for predication hense moving mask operand to a temporary.
18740 __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18741 __ lea($tmp$$Register, $mem$$Address);
18742 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18743 %}
18744 ins_pipe( pipe_slow );
18745 %}
18746
18747 // ====================REPLICATE=======================================
18748
18749 // Replicate byte scalar to be vector
18750 instruct vReplB_reg(vec dst, rRegI src) %{
18751 predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18752 match(Set dst (Replicate src));
18753 format %{ "replicateB $dst,$src" %}
18754 ins_encode %{
18755 uint vlen = Matcher::vector_length(this);
18756 if (UseAVX >= 2) {
18757 int vlen_enc = vector_length_encoding(this);
18758 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18759 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18760 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18761 } else {
18762 __ movdl($dst$$XMMRegister, $src$$Register);
18763 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18764 }
18765 } else {
18766 assert(UseAVX < 2, "");
18767 __ movdl($dst$$XMMRegister, $src$$Register);
18768 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18769 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18770 if (vlen >= 16) {
18771 assert(vlen == 16, "");
18772 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18773 }
18774 }
18775 %}
18776 ins_pipe( pipe_slow );
18777 %}
18778
18779 instruct ReplB_mem(vec dst, memory mem) %{
18780 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18781 match(Set dst (Replicate (LoadB mem)));
18782 format %{ "replicateB $dst,$mem" %}
18783 ins_encode %{
18784 int vlen_enc = vector_length_encoding(this);
18785 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18786 %}
18787 ins_pipe( pipe_slow );
18788 %}
18789
18790 // ====================ReplicateS=======================================
18791
18792 instruct vReplS_reg(vec dst, rRegI src) %{
18793 predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18794 match(Set dst (Replicate src));
18795 format %{ "replicateS $dst,$src" %}
18796 ins_encode %{
18797 uint vlen = Matcher::vector_length(this);
18798 int vlen_enc = vector_length_encoding(this);
18799 if (UseAVX >= 2) {
18800 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18801 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18802 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18803 } else {
18804 __ movdl($dst$$XMMRegister, $src$$Register);
18805 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18806 }
18807 } else {
18808 assert(UseAVX < 2, "");
18809 __ movdl($dst$$XMMRegister, $src$$Register);
18810 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18811 if (vlen >= 8) {
18812 assert(vlen == 8, "");
18813 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18814 }
18815 }
18816 %}
18817 ins_pipe( pipe_slow );
18818 %}
18819
18820 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18821 match(Set dst (Replicate con));
18822 effect(TEMP rtmp);
18823 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18824 ins_encode %{
18825 int vlen_enc = vector_length_encoding(this);
18826 BasicType bt = Matcher::vector_element_basic_type(this);
18827 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18828 __ movl($rtmp$$Register, $con$$constant);
18829 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18830 %}
18831 ins_pipe( pipe_slow );
18832 %}
18833
18834 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18835 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18836 match(Set dst (Replicate src));
18837 effect(TEMP rtmp);
18838 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18839 ins_encode %{
18840 int vlen_enc = vector_length_encoding(this);
18841 __ evmovw($rtmp$$Register, $src$$XMMRegister);
18842 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18843 %}
18844 ins_pipe( pipe_slow );
18845 %}
18846
18847 instruct ReplS_mem(vec dst, memory mem) %{
18848 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18849 match(Set dst (Replicate (LoadS mem)));
18850 format %{ "replicateS $dst,$mem" %}
18851 ins_encode %{
18852 int vlen_enc = vector_length_encoding(this);
18853 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18854 %}
18855 ins_pipe( pipe_slow );
18856 %}
18857
18858 // ====================ReplicateI=======================================
18859
18860 instruct ReplI_reg(vec dst, rRegI src) %{
18861 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18862 match(Set dst (Replicate src));
18863 format %{ "replicateI $dst,$src" %}
18864 ins_encode %{
18865 uint vlen = Matcher::vector_length(this);
18866 int vlen_enc = vector_length_encoding(this);
18867 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18868 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18869 } else if (VM_Version::supports_avx2()) {
18870 __ movdl($dst$$XMMRegister, $src$$Register);
18871 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18872 } else {
18873 __ movdl($dst$$XMMRegister, $src$$Register);
18874 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18875 }
18876 %}
18877 ins_pipe( pipe_slow );
18878 %}
18879
18880 instruct ReplI_mem(vec dst, memory mem) %{
18881 predicate(Matcher::vector_element_basic_type(n) == T_INT);
18882 match(Set dst (Replicate (LoadI mem)));
18883 format %{ "replicateI $dst,$mem" %}
18884 ins_encode %{
18885 int vlen_enc = vector_length_encoding(this);
18886 if (VM_Version::supports_avx2()) {
18887 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18888 } else if (VM_Version::supports_avx()) {
18889 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18890 } else {
18891 __ movdl($dst$$XMMRegister, $mem$$Address);
18892 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18893 }
18894 %}
18895 ins_pipe( pipe_slow );
18896 %}
18897
18898 instruct ReplI_imm(vec dst, immI con) %{
18899 predicate(Matcher::is_non_long_integral_vector(n));
18900 match(Set dst (Replicate con));
18901 format %{ "replicateI $dst,$con" %}
18902 ins_encode %{
18903 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18904 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18905 type2aelembytes(Matcher::vector_element_basic_type(this))));
18906 BasicType bt = Matcher::vector_element_basic_type(this);
18907 int vlen = Matcher::vector_length_in_bytes(this);
18908 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18909 %}
18910 ins_pipe( pipe_slow );
18911 %}
18912
18913 // Replicate scalar zero to be vector
18914 instruct ReplI_zero(vec dst, immI_0 zero) %{
18915 predicate(Matcher::is_non_long_integral_vector(n));
18916 match(Set dst (Replicate zero));
18917 format %{ "replicateI $dst,$zero" %}
18918 ins_encode %{
18919 int vlen_enc = vector_length_encoding(this);
18920 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18921 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18922 } else {
18923 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18924 }
18925 %}
18926 ins_pipe( fpu_reg_reg );
18927 %}
18928
18929 instruct ReplI_M1(vec dst, immI_M1 con) %{
18930 predicate(Matcher::is_non_long_integral_vector(n));
18931 match(Set dst (Replicate con));
18932 format %{ "vallones $dst" %}
18933 ins_encode %{
18934 int vector_len = vector_length_encoding(this);
18935 __ vallones($dst$$XMMRegister, vector_len);
18936 %}
18937 ins_pipe( pipe_slow );
18938 %}
18939
18940 // ====================ReplicateL=======================================
18941
18942 // Replicate long (8 byte) scalar to be vector
18943 instruct ReplL_reg(vec dst, rRegL src) %{
18944 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18945 match(Set dst (Replicate src));
18946 format %{ "replicateL $dst,$src" %}
18947 ins_encode %{
18948 int vlen = Matcher::vector_length(this);
18949 int vlen_enc = vector_length_encoding(this);
18950 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18951 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18952 } else if (VM_Version::supports_avx2()) {
18953 __ movdq($dst$$XMMRegister, $src$$Register);
18954 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18955 } else {
18956 __ movdq($dst$$XMMRegister, $src$$Register);
18957 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18958 }
18959 %}
18960 ins_pipe( pipe_slow );
18961 %}
18962
18963 instruct ReplL_mem(vec dst, memory mem) %{
18964 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18965 match(Set dst (Replicate (LoadL mem)));
18966 format %{ "replicateL $dst,$mem" %}
18967 ins_encode %{
18968 int vlen_enc = vector_length_encoding(this);
18969 if (VM_Version::supports_avx2()) {
18970 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18971 } else if (VM_Version::supports_sse3()) {
18972 __ movddup($dst$$XMMRegister, $mem$$Address);
18973 } else {
18974 __ movq($dst$$XMMRegister, $mem$$Address);
18975 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18976 }
18977 %}
18978 ins_pipe( pipe_slow );
18979 %}
18980
18981 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18982 instruct ReplL_imm(vec dst, immL con) %{
18983 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18984 match(Set dst (Replicate con));
18985 format %{ "replicateL $dst,$con" %}
18986 ins_encode %{
18987 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18988 int vlen = Matcher::vector_length_in_bytes(this);
18989 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18990 %}
18991 ins_pipe( pipe_slow );
18992 %}
18993
18994 instruct ReplL_zero(vec dst, immL0 zero) %{
18995 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18996 match(Set dst (Replicate zero));
18997 format %{ "replicateL $dst,$zero" %}
18998 ins_encode %{
18999 int vlen_enc = vector_length_encoding(this);
19000 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19001 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19002 } else {
19003 __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19004 }
19005 %}
19006 ins_pipe( fpu_reg_reg );
19007 %}
19008
19009 instruct ReplL_M1(vec dst, immL_M1 con) %{
19010 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19011 match(Set dst (Replicate con));
19012 format %{ "vallones $dst" %}
19013 ins_encode %{
19014 int vector_len = vector_length_encoding(this);
19015 __ vallones($dst$$XMMRegister, vector_len);
19016 %}
19017 ins_pipe( pipe_slow );
19018 %}
19019
19020 // ====================ReplicateF=======================================
19021
19022 instruct vReplF_reg(vec dst, vlRegF src) %{
19023 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19024 match(Set dst (Replicate src));
19025 format %{ "replicateF $dst,$src" %}
19026 ins_encode %{
19027 uint vlen = Matcher::vector_length(this);
19028 int vlen_enc = vector_length_encoding(this);
19029 if (vlen <= 4) {
19030 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19031 } else if (VM_Version::supports_avx2()) {
19032 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19033 } else {
19034 assert(vlen == 8, "sanity");
19035 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19036 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19037 }
19038 %}
19039 ins_pipe( pipe_slow );
19040 %}
19041
19042 instruct ReplF_reg(vec dst, vlRegF src) %{
19043 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19044 match(Set dst (Replicate src));
19045 format %{ "replicateF $dst,$src" %}
19046 ins_encode %{
19047 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19048 %}
19049 ins_pipe( pipe_slow );
19050 %}
19051
19052 instruct ReplF_mem(vec dst, memory mem) %{
19053 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19054 match(Set dst (Replicate (LoadF mem)));
19055 format %{ "replicateF $dst,$mem" %}
19056 ins_encode %{
19057 int vlen_enc = vector_length_encoding(this);
19058 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19059 %}
19060 ins_pipe( pipe_slow );
19061 %}
19062
19063 // Replicate float scalar immediate to be vector by loading from const table.
19064 instruct ReplF_imm(vec dst, immF con) %{
19065 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19066 match(Set dst (Replicate con));
19067 format %{ "replicateF $dst,$con" %}
19068 ins_encode %{
19069 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19070 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19071 int vlen = Matcher::vector_length_in_bytes(this);
19072 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19073 %}
19074 ins_pipe( pipe_slow );
19075 %}
19076
19077 instruct ReplF_zero(vec dst, immF0 zero) %{
19078 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19079 match(Set dst (Replicate zero));
19080 format %{ "replicateF $dst,$zero" %}
19081 ins_encode %{
19082 int vlen_enc = vector_length_encoding(this);
19083 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19084 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19085 } else {
19086 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19087 }
19088 %}
19089 ins_pipe( fpu_reg_reg );
19090 %}
19091
19092 // ====================ReplicateD=======================================
19093
19094 // Replicate double (8 bytes) scalar to be vector
19095 instruct vReplD_reg(vec dst, vlRegD src) %{
19096 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19097 match(Set dst (Replicate src));
19098 format %{ "replicateD $dst,$src" %}
19099 ins_encode %{
19100 uint vlen = Matcher::vector_length(this);
19101 int vlen_enc = vector_length_encoding(this);
19102 if (vlen <= 2) {
19103 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19104 } else if (VM_Version::supports_avx2()) {
19105 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19106 } else {
19107 assert(vlen == 4, "sanity");
19108 __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19109 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19110 }
19111 %}
19112 ins_pipe( pipe_slow );
19113 %}
19114
19115 instruct ReplD_reg(vec dst, vlRegD src) %{
19116 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19117 match(Set dst (Replicate src));
19118 format %{ "replicateD $dst,$src" %}
19119 ins_encode %{
19120 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19121 %}
19122 ins_pipe( pipe_slow );
19123 %}
19124
19125 instruct ReplD_mem(vec dst, memory mem) %{
19126 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19127 match(Set dst (Replicate (LoadD mem)));
19128 format %{ "replicateD $dst,$mem" %}
19129 ins_encode %{
19130 if (Matcher::vector_length(this) >= 4) {
19131 int vlen_enc = vector_length_encoding(this);
19132 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19133 } else {
19134 __ movddup($dst$$XMMRegister, $mem$$Address);
19135 }
19136 %}
19137 ins_pipe( pipe_slow );
19138 %}
19139
19140 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19141 instruct ReplD_imm(vec dst, immD con) %{
19142 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19143 match(Set dst (Replicate con));
19144 format %{ "replicateD $dst,$con" %}
19145 ins_encode %{
19146 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19147 int vlen = Matcher::vector_length_in_bytes(this);
19148 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19149 %}
19150 ins_pipe( pipe_slow );
19151 %}
19152
19153 instruct ReplD_zero(vec dst, immD0 zero) %{
19154 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19155 match(Set dst (Replicate zero));
19156 format %{ "replicateD $dst,$zero" %}
19157 ins_encode %{
19158 int vlen_enc = vector_length_encoding(this);
19159 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19160 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19161 } else {
19162 __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19163 }
19164 %}
19165 ins_pipe( fpu_reg_reg );
19166 %}
19167
19168 // ====================VECTOR INSERT=======================================
19169
19170 instruct insert(vec dst, rRegI val, immU8 idx) %{
19171 predicate(Matcher::vector_length_in_bytes(n) < 32);
19172 match(Set dst (VectorInsert (Binary dst val) idx));
19173 format %{ "vector_insert $dst,$val,$idx" %}
19174 ins_encode %{
19175 assert(UseSSE >= 4, "required");
19176 assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19177
19178 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19179
19180 assert(is_integral_type(elem_bt), "");
19181 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19182
19183 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19184 %}
19185 ins_pipe( pipe_slow );
19186 %}
19187
19188 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19189 predicate(Matcher::vector_length_in_bytes(n) == 32);
19190 match(Set dst (VectorInsert (Binary src val) idx));
19191 effect(TEMP vtmp);
19192 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19193 ins_encode %{
19194 int vlen_enc = Assembler::AVX_256bit;
19195 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19196 int elem_per_lane = 16/type2aelembytes(elem_bt);
19197 int log2epr = log2(elem_per_lane);
19198
19199 assert(is_integral_type(elem_bt), "sanity");
19200 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19201
19202 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19203 uint y_idx = ($idx$$constant >> log2epr) & 1;
19204 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19205 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19206 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19207 %}
19208 ins_pipe( pipe_slow );
19209 %}
19210
19211 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19212 predicate(Matcher::vector_length_in_bytes(n) == 64);
19213 match(Set dst (VectorInsert (Binary src val) idx));
19214 effect(TEMP vtmp);
19215 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19216 ins_encode %{
19217 assert(UseAVX > 2, "sanity");
19218
19219 BasicType elem_bt = Matcher::vector_element_basic_type(this);
19220 int elem_per_lane = 16/type2aelembytes(elem_bt);
19221 int log2epr = log2(elem_per_lane);
19222
19223 assert(is_integral_type(elem_bt), "");
19224 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19225
19226 uint x_idx = $idx$$constant & right_n_bits(log2epr);
19227 uint y_idx = ($idx$$constant >> log2epr) & 3;
19228 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19229 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19230 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19231 %}
19232 ins_pipe( pipe_slow );
19233 %}
19234
19235 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19236 predicate(Matcher::vector_length(n) == 2);
19237 match(Set dst (VectorInsert (Binary dst val) idx));
19238 format %{ "vector_insert $dst,$val,$idx" %}
19239 ins_encode %{
19240 assert(UseSSE >= 4, "required");
19241 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19242 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19243
19244 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19245 %}
19246 ins_pipe( pipe_slow );
19247 %}
19248
19249 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19250 predicate(Matcher::vector_length(n) == 4);
19251 match(Set dst (VectorInsert (Binary src val) idx));
19252 effect(TEMP vtmp);
19253 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19254 ins_encode %{
19255 assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19256 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19257
19258 uint x_idx = $idx$$constant & right_n_bits(1);
19259 uint y_idx = ($idx$$constant >> 1) & 1;
19260 int vlen_enc = Assembler::AVX_256bit;
19261 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19262 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19263 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19264 %}
19265 ins_pipe( pipe_slow );
19266 %}
19267
19268 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19269 predicate(Matcher::vector_length(n) == 8);
19270 match(Set dst (VectorInsert (Binary src val) idx));
19271 effect(TEMP vtmp);
19272 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19273 ins_encode %{
19274 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19275 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19276
19277 uint x_idx = $idx$$constant & right_n_bits(1);
19278 uint y_idx = ($idx$$constant >> 1) & 3;
19279 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19280 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19281 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19282 %}
19283 ins_pipe( pipe_slow );
19284 %}
19285
19286 instruct insertF(vec dst, regF val, immU8 idx) %{
19287 predicate(Matcher::vector_length(n) < 8);
19288 match(Set dst (VectorInsert (Binary dst val) idx));
19289 format %{ "vector_insert $dst,$val,$idx" %}
19290 ins_encode %{
19291 assert(UseSSE >= 4, "sanity");
19292
19293 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19294 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19295
19296 uint x_idx = $idx$$constant & right_n_bits(2);
19297 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19298 %}
19299 ins_pipe( pipe_slow );
19300 %}
19301
19302 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19303 predicate(Matcher::vector_length(n) >= 8);
19304 match(Set dst (VectorInsert (Binary src val) idx));
19305 effect(TEMP vtmp);
19306 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19307 ins_encode %{
19308 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19309 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19310
19311 int vlen = Matcher::vector_length(this);
19312 uint x_idx = $idx$$constant & right_n_bits(2);
19313 if (vlen == 8) {
19314 uint y_idx = ($idx$$constant >> 2) & 1;
19315 int vlen_enc = Assembler::AVX_256bit;
19316 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19317 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19318 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19319 } else {
19320 assert(vlen == 16, "sanity");
19321 uint y_idx = ($idx$$constant >> 2) & 3;
19322 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19323 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19324 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19325 }
19326 %}
19327 ins_pipe( pipe_slow );
19328 %}
19329
19330 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19331 predicate(Matcher::vector_length(n) == 2);
19332 match(Set dst (VectorInsert (Binary dst val) idx));
19333 effect(TEMP tmp);
19334 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19335 ins_encode %{
19336 assert(UseSSE >= 4, "sanity");
19337 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19338 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19339
19340 __ movq($tmp$$Register, $val$$XMMRegister);
19341 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19342 %}
19343 ins_pipe( pipe_slow );
19344 %}
19345
19346 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19347 predicate(Matcher::vector_length(n) == 4);
19348 match(Set dst (VectorInsert (Binary src val) idx));
19349 effect(TEMP vtmp, TEMP tmp);
19350 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19351 ins_encode %{
19352 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19353 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19354
19355 uint x_idx = $idx$$constant & right_n_bits(1);
19356 uint y_idx = ($idx$$constant >> 1) & 1;
19357 int vlen_enc = Assembler::AVX_256bit;
19358 __ movq($tmp$$Register, $val$$XMMRegister);
19359 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19360 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19361 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19362 %}
19363 ins_pipe( pipe_slow );
19364 %}
19365
19366 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19367 predicate(Matcher::vector_length(n) == 8);
19368 match(Set dst (VectorInsert (Binary src val) idx));
19369 effect(TEMP tmp, TEMP vtmp);
19370 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19371 ins_encode %{
19372 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19373 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19374
19375 uint x_idx = $idx$$constant & right_n_bits(1);
19376 uint y_idx = ($idx$$constant >> 1) & 3;
19377 __ movq($tmp$$Register, $val$$XMMRegister);
19378 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19379 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19380 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19381 %}
19382 ins_pipe( pipe_slow );
19383 %}
19384
19385 // ====================REDUCTION ARITHMETIC=======================================
19386
19387 // =======================Int Reduction==========================================
19388
19389 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19390 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19391 match(Set dst (AddReductionVI src1 src2));
19392 match(Set dst (MulReductionVI src1 src2));
19393 match(Set dst (AndReductionV src1 src2));
19394 match(Set dst ( OrReductionV src1 src2));
19395 match(Set dst (XorReductionV src1 src2));
19396 match(Set dst (MinReductionV src1 src2));
19397 match(Set dst (MaxReductionV src1 src2));
19398 match(Set dst (UMinReductionV src1 src2));
19399 match(Set dst (UMaxReductionV src1 src2));
19400 effect(TEMP vtmp1, TEMP vtmp2);
19401 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19402 ins_encode %{
19403 int opcode = this->ideal_Opcode();
19404 int vlen = Matcher::vector_length(this, $src2);
19405 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19406 %}
19407 ins_pipe( pipe_slow );
19408 %}
19409
19410 // =======================Long Reduction==========================================
19411
19412 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19413 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19414 match(Set dst (AddReductionVL src1 src2));
19415 match(Set dst (MulReductionVL src1 src2));
19416 match(Set dst (AndReductionV src1 src2));
19417 match(Set dst ( OrReductionV src1 src2));
19418 match(Set dst (XorReductionV src1 src2));
19419 match(Set dst (MinReductionV src1 src2));
19420 match(Set dst (MaxReductionV src1 src2));
19421 match(Set dst (UMinReductionV src1 src2));
19422 match(Set dst (UMaxReductionV src1 src2));
19423 effect(TEMP vtmp1, TEMP vtmp2);
19424 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19425 ins_encode %{
19426 int opcode = this->ideal_Opcode();
19427 int vlen = Matcher::vector_length(this, $src2);
19428 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19429 %}
19430 ins_pipe( pipe_slow );
19431 %}
19432
19433 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19434 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19435 match(Set dst (AddReductionVL src1 src2));
19436 match(Set dst (MulReductionVL src1 src2));
19437 match(Set dst (AndReductionV src1 src2));
19438 match(Set dst ( OrReductionV src1 src2));
19439 match(Set dst (XorReductionV src1 src2));
19440 match(Set dst (MinReductionV src1 src2));
19441 match(Set dst (MaxReductionV src1 src2));
19442 match(Set dst (UMinReductionV src1 src2));
19443 match(Set dst (UMaxReductionV src1 src2));
19444 effect(TEMP vtmp1, TEMP vtmp2);
19445 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19446 ins_encode %{
19447 int opcode = this->ideal_Opcode();
19448 int vlen = Matcher::vector_length(this, $src2);
19449 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19450 %}
19451 ins_pipe( pipe_slow );
19452 %}
19453
19454 // =======================Float Reduction==========================================
19455
19456 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19457 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19458 match(Set dst (AddReductionVF dst src));
19459 match(Set dst (MulReductionVF dst src));
19460 effect(TEMP dst, TEMP vtmp);
19461 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
19462 ins_encode %{
19463 int opcode = this->ideal_Opcode();
19464 int vlen = Matcher::vector_length(this, $src);
19465 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19466 %}
19467 ins_pipe( pipe_slow );
19468 %}
19469
19470 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19471 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19472 match(Set dst (AddReductionVF dst src));
19473 match(Set dst (MulReductionVF dst src));
19474 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19475 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19476 ins_encode %{
19477 int opcode = this->ideal_Opcode();
19478 int vlen = Matcher::vector_length(this, $src);
19479 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19480 %}
19481 ins_pipe( pipe_slow );
19482 %}
19483
19484 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19485 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19486 match(Set dst (AddReductionVF dst src));
19487 match(Set dst (MulReductionVF dst src));
19488 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19489 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19490 ins_encode %{
19491 int opcode = this->ideal_Opcode();
19492 int vlen = Matcher::vector_length(this, $src);
19493 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19494 %}
19495 ins_pipe( pipe_slow );
19496 %}
19497
19498
19499 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19500 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19501 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19502 // src1 contains reduction identity
19503 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19504 match(Set dst (AddReductionVF src1 src2));
19505 match(Set dst (MulReductionVF src1 src2));
19506 effect(TEMP dst);
19507 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %}
19508 ins_encode %{
19509 int opcode = this->ideal_Opcode();
19510 int vlen = Matcher::vector_length(this, $src2);
19511 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19512 %}
19513 ins_pipe( pipe_slow );
19514 %}
19515
19516 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19517 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19518 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19519 // src1 contains reduction identity
19520 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19521 match(Set dst (AddReductionVF src1 src2));
19522 match(Set dst (MulReductionVF src1 src2));
19523 effect(TEMP dst, TEMP vtmp);
19524 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19525 ins_encode %{
19526 int opcode = this->ideal_Opcode();
19527 int vlen = Matcher::vector_length(this, $src2);
19528 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19529 %}
19530 ins_pipe( pipe_slow );
19531 %}
19532
19533 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19534 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19535 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19536 // src1 contains reduction identity
19537 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19538 match(Set dst (AddReductionVF src1 src2));
19539 match(Set dst (MulReductionVF src1 src2));
19540 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19541 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19542 ins_encode %{
19543 int opcode = this->ideal_Opcode();
19544 int vlen = Matcher::vector_length(this, $src2);
19545 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19546 %}
19547 ins_pipe( pipe_slow );
19548 %}
19549
19550 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19551 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19552 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19553 // src1 contains reduction identity
19554 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19555 match(Set dst (AddReductionVF src1 src2));
19556 match(Set dst (MulReductionVF src1 src2));
19557 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19558 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19559 ins_encode %{
19560 int opcode = this->ideal_Opcode();
19561 int vlen = Matcher::vector_length(this, $src2);
19562 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19563 %}
19564 ins_pipe( pipe_slow );
19565 %}
19566
19567 // =======================Double Reduction==========================================
19568
19569 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19570 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19571 match(Set dst (AddReductionVD dst src));
19572 match(Set dst (MulReductionVD dst src));
19573 effect(TEMP dst, TEMP vtmp);
19574 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19575 ins_encode %{
19576 int opcode = this->ideal_Opcode();
19577 int vlen = Matcher::vector_length(this, $src);
19578 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19579 %}
19580 ins_pipe( pipe_slow );
19581 %}
19582
19583 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19584 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19585 match(Set dst (AddReductionVD dst src));
19586 match(Set dst (MulReductionVD dst src));
19587 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19588 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19589 ins_encode %{
19590 int opcode = this->ideal_Opcode();
19591 int vlen = Matcher::vector_length(this, $src);
19592 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19593 %}
19594 ins_pipe( pipe_slow );
19595 %}
19596
19597 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19598 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19599 match(Set dst (AddReductionVD dst src));
19600 match(Set dst (MulReductionVD dst src));
19601 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19602 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19603 ins_encode %{
19604 int opcode = this->ideal_Opcode();
19605 int vlen = Matcher::vector_length(this, $src);
19606 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19607 %}
19608 ins_pipe( pipe_slow );
19609 %}
19610
19611 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19612 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19613 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19614 // src1 contains reduction identity
19615 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19616 match(Set dst (AddReductionVD src1 src2));
19617 match(Set dst (MulReductionVD src1 src2));
19618 effect(TEMP dst);
19619 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19620 ins_encode %{
19621 int opcode = this->ideal_Opcode();
19622 int vlen = Matcher::vector_length(this, $src2);
19623 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19624 %}
19625 ins_pipe( pipe_slow );
19626 %}
19627
19628 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19629 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19630 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19631 // src1 contains reduction identity
19632 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19633 match(Set dst (AddReductionVD src1 src2));
19634 match(Set dst (MulReductionVD src1 src2));
19635 effect(TEMP dst, TEMP vtmp);
19636 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19637 ins_encode %{
19638 int opcode = this->ideal_Opcode();
19639 int vlen = Matcher::vector_length(this, $src2);
19640 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19641 %}
19642 ins_pipe( pipe_slow );
19643 %}
19644
19645 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19646 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19647 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19648 // src1 contains reduction identity
19649 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19650 match(Set dst (AddReductionVD src1 src2));
19651 match(Set dst (MulReductionVD src1 src2));
19652 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19653 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19654 ins_encode %{
19655 int opcode = this->ideal_Opcode();
19656 int vlen = Matcher::vector_length(this, $src2);
19657 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19658 %}
19659 ins_pipe( pipe_slow );
19660 %}
19661
19662 // =======================Byte Reduction==========================================
19663
19664 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19665 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19666 match(Set dst (AddReductionVI src1 src2));
19667 match(Set dst (AndReductionV src1 src2));
19668 match(Set dst ( OrReductionV src1 src2));
19669 match(Set dst (XorReductionV src1 src2));
19670 match(Set dst (MinReductionV src1 src2));
19671 match(Set dst (MaxReductionV src1 src2));
19672 match(Set dst (UMinReductionV src1 src2));
19673 match(Set dst (UMaxReductionV src1 src2));
19674 effect(TEMP vtmp1, TEMP vtmp2);
19675 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19676 ins_encode %{
19677 int opcode = this->ideal_Opcode();
19678 int vlen = Matcher::vector_length(this, $src2);
19679 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19680 %}
19681 ins_pipe( pipe_slow );
19682 %}
19683
19684 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19685 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19686 match(Set dst (AddReductionVI src1 src2));
19687 match(Set dst (AndReductionV src1 src2));
19688 match(Set dst ( OrReductionV src1 src2));
19689 match(Set dst (XorReductionV src1 src2));
19690 match(Set dst (MinReductionV src1 src2));
19691 match(Set dst (MaxReductionV src1 src2));
19692 match(Set dst (UMinReductionV src1 src2));
19693 match(Set dst (UMaxReductionV src1 src2));
19694 effect(TEMP vtmp1, TEMP vtmp2);
19695 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19696 ins_encode %{
19697 int opcode = this->ideal_Opcode();
19698 int vlen = Matcher::vector_length(this, $src2);
19699 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19700 %}
19701 ins_pipe( pipe_slow );
19702 %}
19703
19704 // =======================Short Reduction==========================================
19705
19706 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19707 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19708 match(Set dst (AddReductionVI src1 src2));
19709 match(Set dst (MulReductionVI src1 src2));
19710 match(Set dst (AndReductionV src1 src2));
19711 match(Set dst ( OrReductionV src1 src2));
19712 match(Set dst (XorReductionV src1 src2));
19713 match(Set dst (MinReductionV src1 src2));
19714 match(Set dst (MaxReductionV src1 src2));
19715 match(Set dst (UMinReductionV src1 src2));
19716 match(Set dst (UMaxReductionV src1 src2));
19717 effect(TEMP vtmp1, TEMP vtmp2);
19718 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19719 ins_encode %{
19720 int opcode = this->ideal_Opcode();
19721 int vlen = Matcher::vector_length(this, $src2);
19722 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19723 %}
19724 ins_pipe( pipe_slow );
19725 %}
19726
19727 // =======================Mul Reduction==========================================
19728
19729 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19730 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19731 Matcher::vector_length(n->in(2)) <= 32); // src2
19732 match(Set dst (MulReductionVI src1 src2));
19733 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19734 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19735 ins_encode %{
19736 int opcode = this->ideal_Opcode();
19737 int vlen = Matcher::vector_length(this, $src2);
19738 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19739 %}
19740 ins_pipe( pipe_slow );
19741 %}
19742
19743 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19744 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19745 Matcher::vector_length(n->in(2)) == 64); // src2
19746 match(Set dst (MulReductionVI src1 src2));
19747 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19748 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19749 ins_encode %{
19750 int opcode = this->ideal_Opcode();
19751 int vlen = Matcher::vector_length(this, $src2);
19752 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19753 %}
19754 ins_pipe( pipe_slow );
19755 %}
19756
19757 //--------------------Min/Max Float Reduction --------------------
19758 // Float Min Reduction
19759 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19760 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19761 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19762 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19763 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19764 Matcher::vector_length(n->in(2)) == 2);
19765 match(Set dst (MinReductionV src1 src2));
19766 match(Set dst (MaxReductionV src1 src2));
19767 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19768 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19769 ins_encode %{
19770 assert(UseAVX > 0, "sanity");
19771
19772 int opcode = this->ideal_Opcode();
19773 int vlen = Matcher::vector_length(this, $src2);
19774 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19775 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19776 %}
19777 ins_pipe( pipe_slow );
19778 %}
19779
19780 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19781 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19782 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19783 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19784 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19785 Matcher::vector_length(n->in(2)) >= 4);
19786 match(Set dst (MinReductionV src1 src2));
19787 match(Set dst (MaxReductionV src1 src2));
19788 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19789 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19790 ins_encode %{
19791 assert(UseAVX > 0, "sanity");
19792
19793 int opcode = this->ideal_Opcode();
19794 int vlen = Matcher::vector_length(this, $src2);
19795 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19796 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19797 %}
19798 ins_pipe( pipe_slow );
19799 %}
19800
19801 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19802 legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19803 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19804 Matcher::vector_length(n->in(2)) == 2);
19805 match(Set dst (MinReductionV dst src));
19806 match(Set dst (MaxReductionV dst src));
19807 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19808 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19809 ins_encode %{
19810 assert(UseAVX > 0, "sanity");
19811
19812 int opcode = this->ideal_Opcode();
19813 int vlen = Matcher::vector_length(this, $src);
19814 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19815 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19816 %}
19817 ins_pipe( pipe_slow );
19818 %}
19819
19820
19821 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19822 legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19823 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19824 Matcher::vector_length(n->in(2)) >= 4);
19825 match(Set dst (MinReductionV dst src));
19826 match(Set dst (MaxReductionV dst src));
19827 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19828 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19829 ins_encode %{
19830 assert(UseAVX > 0, "sanity");
19831
19832 int opcode = this->ideal_Opcode();
19833 int vlen = Matcher::vector_length(this, $src);
19834 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19835 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19836 %}
19837 ins_pipe( pipe_slow );
19838 %}
19839
19840 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19841 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19842 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19843 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19844 Matcher::vector_length(n->in(2)) == 2);
19845 match(Set dst (MinReductionV src1 src2));
19846 match(Set dst (MaxReductionV src1 src2));
19847 effect(TEMP dst, TEMP xtmp1);
19848 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19849 ins_encode %{
19850 int opcode = this->ideal_Opcode();
19851 int vlen = Matcher::vector_length(this, $src2);
19852 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19853 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19854 %}
19855 ins_pipe( pipe_slow );
19856 %}
19857
19858 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19859 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19860 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19861 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19862 Matcher::vector_length(n->in(2)) >= 4);
19863 match(Set dst (MinReductionV src1 src2));
19864 match(Set dst (MaxReductionV src1 src2));
19865 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19866 format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19867 ins_encode %{
19868 int opcode = this->ideal_Opcode();
19869 int vlen = Matcher::vector_length(this, $src2);
19870 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19871 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19872 %}
19873 ins_pipe( pipe_slow );
19874 %}
19875
19876 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19877 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19878 Matcher::vector_length(n->in(2)) == 2);
19879 match(Set dst (MinReductionV dst src));
19880 match(Set dst (MaxReductionV dst src));
19881 effect(TEMP dst, TEMP xtmp1);
19882 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19883 ins_encode %{
19884 int opcode = this->ideal_Opcode();
19885 int vlen = Matcher::vector_length(this, $src);
19886 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19887 $xtmp1$$XMMRegister);
19888 %}
19889 ins_pipe( pipe_slow );
19890 %}
19891
19892 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19893 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19894 Matcher::vector_length(n->in(2)) >= 4);
19895 match(Set dst (MinReductionV dst src));
19896 match(Set dst (MaxReductionV dst src));
19897 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19898 format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19899 ins_encode %{
19900 int opcode = this->ideal_Opcode();
19901 int vlen = Matcher::vector_length(this, $src);
19902 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19903 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19904 %}
19905 ins_pipe( pipe_slow );
19906 %}
19907
19908 //--------------------Min Double Reduction --------------------
19909 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19910 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19911 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19912 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19913 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19914 Matcher::vector_length(n->in(2)) == 2);
19915 match(Set dst (MinReductionV src1 src2));
19916 match(Set dst (MaxReductionV src1 src2));
19917 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19918 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19919 ins_encode %{
19920 assert(UseAVX > 0, "sanity");
19921
19922 int opcode = this->ideal_Opcode();
19923 int vlen = Matcher::vector_length(this, $src2);
19924 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19925 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19926 %}
19927 ins_pipe( pipe_slow );
19928 %}
19929
19930 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19931 legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19932 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19933 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19934 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19935 Matcher::vector_length(n->in(2)) >= 4);
19936 match(Set dst (MinReductionV src1 src2));
19937 match(Set dst (MaxReductionV src1 src2));
19938 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19939 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19940 ins_encode %{
19941 assert(UseAVX > 0, "sanity");
19942
19943 int opcode = this->ideal_Opcode();
19944 int vlen = Matcher::vector_length(this, $src2);
19945 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19946 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19947 %}
19948 ins_pipe( pipe_slow );
19949 %}
19950
19951
19952 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19953 legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19954 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19955 Matcher::vector_length(n->in(2)) == 2);
19956 match(Set dst (MinReductionV dst src));
19957 match(Set dst (MaxReductionV dst src));
19958 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19959 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19960 ins_encode %{
19961 assert(UseAVX > 0, "sanity");
19962
19963 int opcode = this->ideal_Opcode();
19964 int vlen = Matcher::vector_length(this, $src);
19965 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19966 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19967 %}
19968 ins_pipe( pipe_slow );
19969 %}
19970
19971 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19972 legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19973 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19974 Matcher::vector_length(n->in(2)) >= 4);
19975 match(Set dst (MinReductionV dst src));
19976 match(Set dst (MaxReductionV dst src));
19977 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19978 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19979 ins_encode %{
19980 assert(UseAVX > 0, "sanity");
19981
19982 int opcode = this->ideal_Opcode();
19983 int vlen = Matcher::vector_length(this, $src);
19984 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19985 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19986 %}
19987 ins_pipe( pipe_slow );
19988 %}
19989
19990 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19991 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19992 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19993 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19994 Matcher::vector_length(n->in(2)) == 2);
19995 match(Set dst (MinReductionV src1 src2));
19996 match(Set dst (MaxReductionV src1 src2));
19997 effect(TEMP dst, TEMP xtmp1);
19998 format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19999 ins_encode %{
20000 int opcode = this->ideal_Opcode();
20001 int vlen = Matcher::vector_length(this, $src2);
20002 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20003 xnoreg, xnoreg, $xtmp1$$XMMRegister);
20004 %}
20005 ins_pipe( pipe_slow );
20006 %}
20007
20008 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20009 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20010 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20011 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20012 Matcher::vector_length(n->in(2)) >= 4);
20013 match(Set dst (MinReductionV src1 src2));
20014 match(Set dst (MaxReductionV src1 src2));
20015 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20016 format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20017 ins_encode %{
20018 int opcode = this->ideal_Opcode();
20019 int vlen = Matcher::vector_length(this, $src2);
20020 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20021 xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20022 %}
20023 ins_pipe( pipe_slow );
20024 %}
20025
20026
20027 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20028 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20029 Matcher::vector_length(n->in(2)) == 2);
20030 match(Set dst (MinReductionV dst src));
20031 match(Set dst (MaxReductionV dst src));
20032 effect(TEMP dst, TEMP xtmp1);
20033 format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20034 ins_encode %{
20035 int opcode = this->ideal_Opcode();
20036 int vlen = Matcher::vector_length(this, $src);
20037 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20038 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20039 %}
20040 ins_pipe( pipe_slow );
20041 %}
20042
20043 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20044 predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20045 Matcher::vector_length(n->in(2)) >= 4);
20046 match(Set dst (MinReductionV dst src));
20047 match(Set dst (MaxReductionV dst src));
20048 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20049 format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20050 ins_encode %{
20051 int opcode = this->ideal_Opcode();
20052 int vlen = Matcher::vector_length(this, $src);
20053 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20054 xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20055 %}
20056 ins_pipe( pipe_slow );
20057 %}
20058
20059 // ====================VECTOR ARITHMETIC=======================================
20060
20061 // --------------------------------- ADD --------------------------------------
20062
20063 // Bytes vector add
20064 instruct vaddB(vec dst, vec src) %{
20065 predicate(UseAVX == 0);
20066 match(Set dst (AddVB dst src));
20067 format %{ "paddb $dst,$src\t! add packedB" %}
20068 ins_encode %{
20069 __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20070 %}
20071 ins_pipe( pipe_slow );
20072 %}
20073
20074 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20075 predicate(UseAVX > 0);
20076 match(Set dst (AddVB src1 src2));
20077 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
20078 ins_encode %{
20079 int vlen_enc = vector_length_encoding(this);
20080 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20081 %}
20082 ins_pipe( pipe_slow );
20083 %}
20084
20085 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20086 predicate((UseAVX > 0) &&
20087 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20088 match(Set dst (AddVB src (LoadVector mem)));
20089 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
20090 ins_encode %{
20091 int vlen_enc = vector_length_encoding(this);
20092 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20093 %}
20094 ins_pipe( pipe_slow );
20095 %}
20096
20097 // Shorts/Chars vector add
20098 instruct vaddS(vec dst, vec src) %{
20099 predicate(UseAVX == 0);
20100 match(Set dst (AddVS dst src));
20101 format %{ "paddw $dst,$src\t! add packedS" %}
20102 ins_encode %{
20103 __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20104 %}
20105 ins_pipe( pipe_slow );
20106 %}
20107
20108 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20109 predicate(UseAVX > 0);
20110 match(Set dst (AddVS src1 src2));
20111 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
20112 ins_encode %{
20113 int vlen_enc = vector_length_encoding(this);
20114 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20115 %}
20116 ins_pipe( pipe_slow );
20117 %}
20118
20119 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20120 predicate((UseAVX > 0) &&
20121 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20122 match(Set dst (AddVS src (LoadVector mem)));
20123 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
20124 ins_encode %{
20125 int vlen_enc = vector_length_encoding(this);
20126 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20127 %}
20128 ins_pipe( pipe_slow );
20129 %}
20130
20131 // Integers vector add
20132 instruct vaddI(vec dst, vec src) %{
20133 predicate(UseAVX == 0);
20134 match(Set dst (AddVI dst src));
20135 format %{ "paddd $dst,$src\t! add packedI" %}
20136 ins_encode %{
20137 __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20138 %}
20139 ins_pipe( pipe_slow );
20140 %}
20141
20142 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20143 predicate(UseAVX > 0);
20144 match(Set dst (AddVI src1 src2));
20145 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
20146 ins_encode %{
20147 int vlen_enc = vector_length_encoding(this);
20148 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20149 %}
20150 ins_pipe( pipe_slow );
20151 %}
20152
20153
20154 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20155 predicate((UseAVX > 0) &&
20156 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20157 match(Set dst (AddVI src (LoadVector mem)));
20158 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
20159 ins_encode %{
20160 int vlen_enc = vector_length_encoding(this);
20161 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20162 %}
20163 ins_pipe( pipe_slow );
20164 %}
20165
20166 // Longs vector add
20167 instruct vaddL(vec dst, vec src) %{
20168 predicate(UseAVX == 0);
20169 match(Set dst (AddVL dst src));
20170 format %{ "paddq $dst,$src\t! add packedL" %}
20171 ins_encode %{
20172 __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20173 %}
20174 ins_pipe( pipe_slow );
20175 %}
20176
20177 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20178 predicate(UseAVX > 0);
20179 match(Set dst (AddVL src1 src2));
20180 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
20181 ins_encode %{
20182 int vlen_enc = vector_length_encoding(this);
20183 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20184 %}
20185 ins_pipe( pipe_slow );
20186 %}
20187
20188 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20189 predicate((UseAVX > 0) &&
20190 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20191 match(Set dst (AddVL src (LoadVector mem)));
20192 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
20193 ins_encode %{
20194 int vlen_enc = vector_length_encoding(this);
20195 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20196 %}
20197 ins_pipe( pipe_slow );
20198 %}
20199
20200 // Floats vector add
20201 instruct vaddF(vec dst, vec src) %{
20202 predicate(UseAVX == 0);
20203 match(Set dst (AddVF dst src));
20204 format %{ "addps $dst,$src\t! add packedF" %}
20205 ins_encode %{
20206 __ addps($dst$$XMMRegister, $src$$XMMRegister);
20207 %}
20208 ins_pipe( pipe_slow );
20209 %}
20210
20211 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20212 predicate(UseAVX > 0);
20213 match(Set dst (AddVF src1 src2));
20214 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
20215 ins_encode %{
20216 int vlen_enc = vector_length_encoding(this);
20217 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20218 %}
20219 ins_pipe( pipe_slow );
20220 %}
20221
20222 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20223 predicate((UseAVX > 0) &&
20224 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20225 match(Set dst (AddVF src (LoadVector mem)));
20226 format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
20227 ins_encode %{
20228 int vlen_enc = vector_length_encoding(this);
20229 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20230 %}
20231 ins_pipe( pipe_slow );
20232 %}
20233
20234 // Doubles vector add
20235 instruct vaddD(vec dst, vec src) %{
20236 predicate(UseAVX == 0);
20237 match(Set dst (AddVD dst src));
20238 format %{ "addpd $dst,$src\t! add packedD" %}
20239 ins_encode %{
20240 __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20241 %}
20242 ins_pipe( pipe_slow );
20243 %}
20244
20245 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20246 predicate(UseAVX > 0);
20247 match(Set dst (AddVD src1 src2));
20248 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
20249 ins_encode %{
20250 int vlen_enc = vector_length_encoding(this);
20251 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20252 %}
20253 ins_pipe( pipe_slow );
20254 %}
20255
20256 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20257 predicate((UseAVX > 0) &&
20258 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20259 match(Set dst (AddVD src (LoadVector mem)));
20260 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
20261 ins_encode %{
20262 int vlen_enc = vector_length_encoding(this);
20263 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20264 %}
20265 ins_pipe( pipe_slow );
20266 %}
20267
20268 // --------------------------------- SUB --------------------------------------
20269
20270 // Bytes vector sub
20271 instruct vsubB(vec dst, vec src) %{
20272 predicate(UseAVX == 0);
20273 match(Set dst (SubVB dst src));
20274 format %{ "psubb $dst,$src\t! sub packedB" %}
20275 ins_encode %{
20276 __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20277 %}
20278 ins_pipe( pipe_slow );
20279 %}
20280
20281 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20282 predicate(UseAVX > 0);
20283 match(Set dst (SubVB src1 src2));
20284 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
20285 ins_encode %{
20286 int vlen_enc = vector_length_encoding(this);
20287 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20288 %}
20289 ins_pipe( pipe_slow );
20290 %}
20291
20292 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20293 predicate((UseAVX > 0) &&
20294 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20295 match(Set dst (SubVB src (LoadVector mem)));
20296 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
20297 ins_encode %{
20298 int vlen_enc = vector_length_encoding(this);
20299 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20300 %}
20301 ins_pipe( pipe_slow );
20302 %}
20303
20304 // Shorts/Chars vector sub
20305 instruct vsubS(vec dst, vec src) %{
20306 predicate(UseAVX == 0);
20307 match(Set dst (SubVS dst src));
20308 format %{ "psubw $dst,$src\t! sub packedS" %}
20309 ins_encode %{
20310 __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20311 %}
20312 ins_pipe( pipe_slow );
20313 %}
20314
20315
20316 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20317 predicate(UseAVX > 0);
20318 match(Set dst (SubVS src1 src2));
20319 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
20320 ins_encode %{
20321 int vlen_enc = vector_length_encoding(this);
20322 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20323 %}
20324 ins_pipe( pipe_slow );
20325 %}
20326
20327 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20328 predicate((UseAVX > 0) &&
20329 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20330 match(Set dst (SubVS src (LoadVector mem)));
20331 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
20332 ins_encode %{
20333 int vlen_enc = vector_length_encoding(this);
20334 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20335 %}
20336 ins_pipe( pipe_slow );
20337 %}
20338
20339 // Integers vector sub
20340 instruct vsubI(vec dst, vec src) %{
20341 predicate(UseAVX == 0);
20342 match(Set dst (SubVI dst src));
20343 format %{ "psubd $dst,$src\t! sub packedI" %}
20344 ins_encode %{
20345 __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20346 %}
20347 ins_pipe( pipe_slow );
20348 %}
20349
20350 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20351 predicate(UseAVX > 0);
20352 match(Set dst (SubVI src1 src2));
20353 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
20354 ins_encode %{
20355 int vlen_enc = vector_length_encoding(this);
20356 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20357 %}
20358 ins_pipe( pipe_slow );
20359 %}
20360
20361 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20362 predicate((UseAVX > 0) &&
20363 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20364 match(Set dst (SubVI src (LoadVector mem)));
20365 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
20366 ins_encode %{
20367 int vlen_enc = vector_length_encoding(this);
20368 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20369 %}
20370 ins_pipe( pipe_slow );
20371 %}
20372
20373 // Longs vector sub
20374 instruct vsubL(vec dst, vec src) %{
20375 predicate(UseAVX == 0);
20376 match(Set dst (SubVL dst src));
20377 format %{ "psubq $dst,$src\t! sub packedL" %}
20378 ins_encode %{
20379 __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20380 %}
20381 ins_pipe( pipe_slow );
20382 %}
20383
20384 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20385 predicate(UseAVX > 0);
20386 match(Set dst (SubVL src1 src2));
20387 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
20388 ins_encode %{
20389 int vlen_enc = vector_length_encoding(this);
20390 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20391 %}
20392 ins_pipe( pipe_slow );
20393 %}
20394
20395
20396 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20397 predicate((UseAVX > 0) &&
20398 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20399 match(Set dst (SubVL src (LoadVector mem)));
20400 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
20401 ins_encode %{
20402 int vlen_enc = vector_length_encoding(this);
20403 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20404 %}
20405 ins_pipe( pipe_slow );
20406 %}
20407
20408 // Floats vector sub
20409 instruct vsubF(vec dst, vec src) %{
20410 predicate(UseAVX == 0);
20411 match(Set dst (SubVF dst src));
20412 format %{ "subps $dst,$src\t! sub packedF" %}
20413 ins_encode %{
20414 __ subps($dst$$XMMRegister, $src$$XMMRegister);
20415 %}
20416 ins_pipe( pipe_slow );
20417 %}
20418
20419 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20420 predicate(UseAVX > 0);
20421 match(Set dst (SubVF src1 src2));
20422 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
20423 ins_encode %{
20424 int vlen_enc = vector_length_encoding(this);
20425 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20426 %}
20427 ins_pipe( pipe_slow );
20428 %}
20429
20430 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20431 predicate((UseAVX > 0) &&
20432 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20433 match(Set dst (SubVF src (LoadVector mem)));
20434 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
20435 ins_encode %{
20436 int vlen_enc = vector_length_encoding(this);
20437 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20438 %}
20439 ins_pipe( pipe_slow );
20440 %}
20441
20442 // Doubles vector sub
20443 instruct vsubD(vec dst, vec src) %{
20444 predicate(UseAVX == 0);
20445 match(Set dst (SubVD dst src));
20446 format %{ "subpd $dst,$src\t! sub packedD" %}
20447 ins_encode %{
20448 __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20449 %}
20450 ins_pipe( pipe_slow );
20451 %}
20452
20453 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20454 predicate(UseAVX > 0);
20455 match(Set dst (SubVD src1 src2));
20456 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
20457 ins_encode %{
20458 int vlen_enc = vector_length_encoding(this);
20459 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20460 %}
20461 ins_pipe( pipe_slow );
20462 %}
20463
20464 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20465 predicate((UseAVX > 0) &&
20466 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20467 match(Set dst (SubVD src (LoadVector mem)));
20468 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
20469 ins_encode %{
20470 int vlen_enc = vector_length_encoding(this);
20471 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20472 %}
20473 ins_pipe( pipe_slow );
20474 %}
20475
20476 // --------------------------------- MUL --------------------------------------
20477
20478 // Byte vector mul
20479 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20480 predicate(Matcher::vector_length_in_bytes(n) <= 8);
20481 match(Set dst (MulVB src1 src2));
20482 effect(TEMP dst, TEMP xtmp);
20483 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20484 ins_encode %{
20485 assert(UseSSE > 3, "required");
20486 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20487 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20488 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20489 __ psllw($dst$$XMMRegister, 8);
20490 __ psrlw($dst$$XMMRegister, 8);
20491 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20492 %}
20493 ins_pipe( pipe_slow );
20494 %}
20495
20496 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20497 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20498 match(Set dst (MulVB src1 src2));
20499 effect(TEMP dst, TEMP xtmp);
20500 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20501 ins_encode %{
20502 assert(UseSSE > 3, "required");
20503 // Odd-index elements
20504 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20505 __ psrlw($dst$$XMMRegister, 8);
20506 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20507 __ psrlw($xtmp$$XMMRegister, 8);
20508 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20509 __ psllw($dst$$XMMRegister, 8);
20510 // Even-index elements
20511 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20512 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20513 __ psllw($xtmp$$XMMRegister, 8);
20514 __ psrlw($xtmp$$XMMRegister, 8);
20515 // Combine
20516 __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20517 %}
20518 ins_pipe( pipe_slow );
20519 %}
20520
20521 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20522 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20523 match(Set dst (MulVB src1 src2));
20524 effect(TEMP xtmp1, TEMP xtmp2);
20525 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20526 ins_encode %{
20527 int vlen_enc = vector_length_encoding(this);
20528 // Odd-index elements
20529 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20530 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20531 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20532 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20533 // Even-index elements
20534 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20535 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20536 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20537 // Combine
20538 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20539 %}
20540 ins_pipe( pipe_slow );
20541 %}
20542
20543 // Shorts/Chars vector mul
20544 instruct vmulS(vec dst, vec src) %{
20545 predicate(UseAVX == 0);
20546 match(Set dst (MulVS dst src));
20547 format %{ "pmullw $dst,$src\t! mul packedS" %}
20548 ins_encode %{
20549 __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20550 %}
20551 ins_pipe( pipe_slow );
20552 %}
20553
20554 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20555 predicate(UseAVX > 0);
20556 match(Set dst (MulVS src1 src2));
20557 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20558 ins_encode %{
20559 int vlen_enc = vector_length_encoding(this);
20560 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20561 %}
20562 ins_pipe( pipe_slow );
20563 %}
20564
20565 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20566 predicate((UseAVX > 0) &&
20567 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20568 match(Set dst (MulVS src (LoadVector mem)));
20569 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20570 ins_encode %{
20571 int vlen_enc = vector_length_encoding(this);
20572 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20573 %}
20574 ins_pipe( pipe_slow );
20575 %}
20576
20577 // Integers vector mul
20578 instruct vmulI(vec dst, vec src) %{
20579 predicate(UseAVX == 0);
20580 match(Set dst (MulVI dst src));
20581 format %{ "pmulld $dst,$src\t! mul packedI" %}
20582 ins_encode %{
20583 assert(UseSSE > 3, "required");
20584 __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20585 %}
20586 ins_pipe( pipe_slow );
20587 %}
20588
20589 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20590 predicate(UseAVX > 0);
20591 match(Set dst (MulVI src1 src2));
20592 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20593 ins_encode %{
20594 int vlen_enc = vector_length_encoding(this);
20595 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20596 %}
20597 ins_pipe( pipe_slow );
20598 %}
20599
20600 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20601 predicate((UseAVX > 0) &&
20602 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20603 match(Set dst (MulVI src (LoadVector mem)));
20604 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20605 ins_encode %{
20606 int vlen_enc = vector_length_encoding(this);
20607 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20608 %}
20609 ins_pipe( pipe_slow );
20610 %}
20611
20612 // Longs vector mul
20613 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20614 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20615 VM_Version::supports_avx512dq()) ||
20616 VM_Version::supports_avx512vldq());
20617 match(Set dst (MulVL src1 src2));
20618 ins_cost(500);
20619 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20620 ins_encode %{
20621 assert(UseAVX > 2, "required");
20622 int vlen_enc = vector_length_encoding(this);
20623 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20624 %}
20625 ins_pipe( pipe_slow );
20626 %}
20627
20628 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20629 predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20630 VM_Version::supports_avx512dq()) ||
20631 (Matcher::vector_length_in_bytes(n) > 8 &&
20632 VM_Version::supports_avx512vldq()));
20633 match(Set dst (MulVL src (LoadVector mem)));
20634 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20635 ins_cost(500);
20636 ins_encode %{
20637 assert(UseAVX > 2, "required");
20638 int vlen_enc = vector_length_encoding(this);
20639 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20640 %}
20641 ins_pipe( pipe_slow );
20642 %}
20643
20644 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20645 predicate(UseAVX == 0);
20646 match(Set dst (MulVL src1 src2));
20647 ins_cost(500);
20648 effect(TEMP dst, TEMP xtmp);
20649 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20650 ins_encode %{
20651 assert(VM_Version::supports_sse4_1(), "required");
20652 // Get the lo-hi products, only the lower 32 bits is in concerns
20653 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20654 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20655 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20656 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20657 __ psllq($dst$$XMMRegister, 32);
20658 // Get the lo-lo products
20659 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20660 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20661 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20662 %}
20663 ins_pipe( pipe_slow );
20664 %}
20665
20666 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20667 predicate(UseAVX > 0 &&
20668 ((Matcher::vector_length_in_bytes(n) == 64 &&
20669 !VM_Version::supports_avx512dq()) ||
20670 (Matcher::vector_length_in_bytes(n) < 64 &&
20671 !VM_Version::supports_avx512vldq())));
20672 match(Set dst (MulVL src1 src2));
20673 effect(TEMP xtmp1, TEMP xtmp2);
20674 ins_cost(500);
20675 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20676 ins_encode %{
20677 int vlen_enc = vector_length_encoding(this);
20678 // Get the lo-hi products, only the lower 32 bits is in concerns
20679 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20680 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20681 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20682 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20683 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20684 // Get the lo-lo products
20685 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20686 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20687 %}
20688 ins_pipe( pipe_slow );
20689 %}
20690
20691 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20692 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20693 match(Set dst (MulVL src1 src2));
20694 ins_cost(100);
20695 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20696 ins_encode %{
20697 int vlen_enc = vector_length_encoding(this);
20698 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20699 %}
20700 ins_pipe( pipe_slow );
20701 %}
20702
20703 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20704 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20705 match(Set dst (MulVL src1 src2));
20706 ins_cost(100);
20707 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20708 ins_encode %{
20709 int vlen_enc = vector_length_encoding(this);
20710 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20711 %}
20712 ins_pipe( pipe_slow );
20713 %}
20714
20715 // Floats vector mul
20716 instruct vmulF(vec dst, vec src) %{
20717 predicate(UseAVX == 0);
20718 match(Set dst (MulVF dst src));
20719 format %{ "mulps $dst,$src\t! mul packedF" %}
20720 ins_encode %{
20721 __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20722 %}
20723 ins_pipe( pipe_slow );
20724 %}
20725
20726 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20727 predicate(UseAVX > 0);
20728 match(Set dst (MulVF src1 src2));
20729 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
20730 ins_encode %{
20731 int vlen_enc = vector_length_encoding(this);
20732 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20733 %}
20734 ins_pipe( pipe_slow );
20735 %}
20736
20737 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20738 predicate((UseAVX > 0) &&
20739 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20740 match(Set dst (MulVF src (LoadVector mem)));
20741 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
20742 ins_encode %{
20743 int vlen_enc = vector_length_encoding(this);
20744 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20745 %}
20746 ins_pipe( pipe_slow );
20747 %}
20748
20749 // Doubles vector mul
20750 instruct vmulD(vec dst, vec src) %{
20751 predicate(UseAVX == 0);
20752 match(Set dst (MulVD dst src));
20753 format %{ "mulpd $dst,$src\t! mul packedD" %}
20754 ins_encode %{
20755 __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20756 %}
20757 ins_pipe( pipe_slow );
20758 %}
20759
20760 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20761 predicate(UseAVX > 0);
20762 match(Set dst (MulVD src1 src2));
20763 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
20764 ins_encode %{
20765 int vlen_enc = vector_length_encoding(this);
20766 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20767 %}
20768 ins_pipe( pipe_slow );
20769 %}
20770
20771 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20772 predicate((UseAVX > 0) &&
20773 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20774 match(Set dst (MulVD src (LoadVector mem)));
20775 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
20776 ins_encode %{
20777 int vlen_enc = vector_length_encoding(this);
20778 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20779 %}
20780 ins_pipe( pipe_slow );
20781 %}
20782
20783 // --------------------------------- DIV --------------------------------------
20784
20785 // Floats vector div
20786 instruct vdivF(vec dst, vec src) %{
20787 predicate(UseAVX == 0);
20788 match(Set dst (DivVF dst src));
20789 format %{ "divps $dst,$src\t! div packedF" %}
20790 ins_encode %{
20791 __ divps($dst$$XMMRegister, $src$$XMMRegister);
20792 %}
20793 ins_pipe( pipe_slow );
20794 %}
20795
20796 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20797 predicate(UseAVX > 0);
20798 match(Set dst (DivVF src1 src2));
20799 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
20800 ins_encode %{
20801 int vlen_enc = vector_length_encoding(this);
20802 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20803 %}
20804 ins_pipe( pipe_slow );
20805 %}
20806
20807 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20808 predicate((UseAVX > 0) &&
20809 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20810 match(Set dst (DivVF src (LoadVector mem)));
20811 format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
20812 ins_encode %{
20813 int vlen_enc = vector_length_encoding(this);
20814 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20815 %}
20816 ins_pipe( pipe_slow );
20817 %}
20818
20819 // Doubles vector div
20820 instruct vdivD(vec dst, vec src) %{
20821 predicate(UseAVX == 0);
20822 match(Set dst (DivVD dst src));
20823 format %{ "divpd $dst,$src\t! div packedD" %}
20824 ins_encode %{
20825 __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20826 %}
20827 ins_pipe( pipe_slow );
20828 %}
20829
20830 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20831 predicate(UseAVX > 0);
20832 match(Set dst (DivVD src1 src2));
20833 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
20834 ins_encode %{
20835 int vlen_enc = vector_length_encoding(this);
20836 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20837 %}
20838 ins_pipe( pipe_slow );
20839 %}
20840
20841 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20842 predicate((UseAVX > 0) &&
20843 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20844 match(Set dst (DivVD src (LoadVector mem)));
20845 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
20846 ins_encode %{
20847 int vlen_enc = vector_length_encoding(this);
20848 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20849 %}
20850 ins_pipe( pipe_slow );
20851 %}
20852
20853 // ------------------------------ MinMax ---------------------------------------
20854
20855 // Byte, Short, Int vector Min/Max
20856 instruct minmax_reg_sse(vec dst, vec src) %{
20857 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20858 UseAVX == 0);
20859 match(Set dst (MinV dst src));
20860 match(Set dst (MaxV dst src));
20861 format %{ "vector_minmax $dst,$src\t! " %}
20862 ins_encode %{
20863 assert(UseSSE >= 4, "required");
20864
20865 int opcode = this->ideal_Opcode();
20866 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20867 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20868 %}
20869 ins_pipe( pipe_slow );
20870 %}
20871
20872 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20873 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20874 UseAVX > 0);
20875 match(Set dst (MinV src1 src2));
20876 match(Set dst (MaxV src1 src2));
20877 format %{ "vector_minmax $dst,$src1,$src2\t! " %}
20878 ins_encode %{
20879 int opcode = this->ideal_Opcode();
20880 int vlen_enc = vector_length_encoding(this);
20881 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20882
20883 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20884 %}
20885 ins_pipe( pipe_slow );
20886 %}
20887
20888 // Long vector Min/Max
20889 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20890 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20891 UseAVX == 0);
20892 match(Set dst (MinV dst src));
20893 match(Set dst (MaxV src dst));
20894 effect(TEMP dst, TEMP tmp);
20895 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
20896 ins_encode %{
20897 assert(UseSSE >= 4, "required");
20898
20899 int opcode = this->ideal_Opcode();
20900 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20901 assert(elem_bt == T_LONG, "sanity");
20902
20903 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20904 %}
20905 ins_pipe( pipe_slow );
20906 %}
20907
20908 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20909 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20910 UseAVX > 0 && !VM_Version::supports_avx512vl());
20911 match(Set dst (MinV src1 src2));
20912 match(Set dst (MaxV src1 src2));
20913 effect(TEMP dst);
20914 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
20915 ins_encode %{
20916 int vlen_enc = vector_length_encoding(this);
20917 int opcode = this->ideal_Opcode();
20918 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20919 assert(elem_bt == T_LONG, "sanity");
20920
20921 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20922 %}
20923 ins_pipe( pipe_slow );
20924 %}
20925
20926 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20927 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20928 Matcher::vector_element_basic_type(n) == T_LONG);
20929 match(Set dst (MinV src1 src2));
20930 match(Set dst (MaxV src1 src2));
20931 format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
20932 ins_encode %{
20933 assert(UseAVX > 2, "required");
20934
20935 int vlen_enc = vector_length_encoding(this);
20936 int opcode = this->ideal_Opcode();
20937 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20938 assert(elem_bt == T_LONG, "sanity");
20939
20940 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20941 %}
20942 ins_pipe( pipe_slow );
20943 %}
20944
20945 // Float/Double vector Min/Max
20946 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20947 predicate(VM_Version::supports_avx10_2() &&
20948 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20949 match(Set dst (MinV a b));
20950 match(Set dst (MaxV a b));
20951 format %{ "vector_minmaxFP $dst, $a, $b" %}
20952 ins_encode %{
20953 int vlen_enc = vector_length_encoding(this);
20954 int opcode = this->ideal_Opcode();
20955 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20956 __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20957 %}
20958 ins_pipe( pipe_slow );
20959 %}
20960
20961 // Float/Double vector Min/Max
20962 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20963 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20964 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20965 UseAVX > 0);
20966 match(Set dst (MinV a b));
20967 match(Set dst (MaxV a b));
20968 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20969 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20970 ins_encode %{
20971 assert(UseAVX > 0, "required");
20972
20973 int opcode = this->ideal_Opcode();
20974 int vlen_enc = vector_length_encoding(this);
20975 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20976
20977 __ vminmax_fp(opcode, elem_bt,
20978 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20979 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20980 %}
20981 ins_pipe( pipe_slow );
20982 %}
20983
20984 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20985 predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20986 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20987 match(Set dst (MinV a b));
20988 match(Set dst (MaxV a b));
20989 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20990 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20991 ins_encode %{
20992 assert(UseAVX > 2, "required");
20993
20994 int opcode = this->ideal_Opcode();
20995 int vlen_enc = vector_length_encoding(this);
20996 BasicType elem_bt = Matcher::vector_element_basic_type(this);
20997
20998 __ evminmax_fp(opcode, elem_bt,
20999 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21000 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21001 %}
21002 ins_pipe( pipe_slow );
21003 %}
21004
21005 // ------------------------------ Unsigned vector Min/Max ----------------------
21006
21007 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21008 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21009 match(Set dst (UMinV a b));
21010 match(Set dst (UMaxV a b));
21011 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21012 ins_encode %{
21013 int opcode = this->ideal_Opcode();
21014 int vlen_enc = vector_length_encoding(this);
21015 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21016 assert(is_integral_type(elem_bt), "");
21017 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21018 %}
21019 ins_pipe( pipe_slow );
21020 %}
21021
21022 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21023 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21024 match(Set dst (UMinV a (LoadVector b)));
21025 match(Set dst (UMaxV a (LoadVector b)));
21026 format %{ "vector_uminmax $dst,$a,$b\t!" %}
21027 ins_encode %{
21028 int opcode = this->ideal_Opcode();
21029 int vlen_enc = vector_length_encoding(this);
21030 BasicType elem_bt = Matcher::vector_element_basic_type(this);
21031 assert(is_integral_type(elem_bt), "");
21032 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21033 %}
21034 ins_pipe( pipe_slow );
21035 %}
21036
21037 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21038 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21039 match(Set dst (UMinV a b));
21040 match(Set dst (UMaxV a b));
21041 effect(TEMP xtmp1, TEMP xtmp2);
21042 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21043 ins_encode %{
21044 int opcode = this->ideal_Opcode();
21045 int vlen_enc = vector_length_encoding(this);
21046 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21047 %}
21048 ins_pipe( pipe_slow );
21049 %}
21050
21051 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21052 match(Set dst (UMinV (Binary dst src2) mask));
21053 match(Set dst (UMaxV (Binary dst src2) mask));
21054 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21055 ins_encode %{
21056 int vlen_enc = vector_length_encoding(this);
21057 BasicType bt = Matcher::vector_element_basic_type(this);
21058 int opc = this->ideal_Opcode();
21059 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21060 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21061 %}
21062 ins_pipe( pipe_slow );
21063 %}
21064
21065 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21066 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21067 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21068 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21069 ins_encode %{
21070 int vlen_enc = vector_length_encoding(this);
21071 BasicType bt = Matcher::vector_element_basic_type(this);
21072 int opc = this->ideal_Opcode();
21073 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21074 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21075 %}
21076 ins_pipe( pipe_slow );
21077 %}
21078
21079 // --------------------------------- Signum/CopySign ---------------------------
21080
21081 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21082 match(Set dst (SignumF dst (Binary zero one)));
21083 effect(KILL cr);
21084 format %{ "signumF $dst, $dst" %}
21085 ins_encode %{
21086 int opcode = this->ideal_Opcode();
21087 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21088 %}
21089 ins_pipe( pipe_slow );
21090 %}
21091
21092 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21093 match(Set dst (SignumD dst (Binary zero one)));
21094 effect(KILL cr);
21095 format %{ "signumD $dst, $dst" %}
21096 ins_encode %{
21097 int opcode = this->ideal_Opcode();
21098 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21099 %}
21100 ins_pipe( pipe_slow );
21101 %}
21102
21103 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21104 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21105 match(Set dst (SignumVF src (Binary zero one)));
21106 match(Set dst (SignumVD src (Binary zero one)));
21107 effect(TEMP dst, TEMP xtmp1);
21108 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21109 ins_encode %{
21110 int opcode = this->ideal_Opcode();
21111 int vec_enc = vector_length_encoding(this);
21112 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21113 $xtmp1$$XMMRegister, vec_enc);
21114 %}
21115 ins_pipe( pipe_slow );
21116 %}
21117
21118 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21119 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21120 match(Set dst (SignumVF src (Binary zero one)));
21121 match(Set dst (SignumVD src (Binary zero one)));
21122 effect(TEMP dst, TEMP ktmp1);
21123 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21124 ins_encode %{
21125 int opcode = this->ideal_Opcode();
21126 int vec_enc = vector_length_encoding(this);
21127 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21128 $ktmp1$$KRegister, vec_enc);
21129 %}
21130 ins_pipe( pipe_slow );
21131 %}
21132
21133 // ---------------------------------------
21134 // For copySign use 0xE4 as writemask for vpternlog
21135 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21136 // C (xmm2) is set to 0x7FFFFFFF
21137 // Wherever xmm2 is 0, we want to pick from B (sign)
21138 // Wherever xmm2 is 1, we want to pick from A (src)
21139 //
21140 // A B C Result
21141 // 0 0 0 0
21142 // 0 0 1 0
21143 // 0 1 0 1
21144 // 0 1 1 0
21145 // 1 0 0 0
21146 // 1 0 1 1
21147 // 1 1 0 1
21148 // 1 1 1 1
21149 //
21150 // Result going from high bit to low bit is 0x11100100 = 0xe4
21151 // ---------------------------------------
21152
21153 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21154 match(Set dst (CopySignF dst src));
21155 effect(TEMP tmp1, TEMP tmp2);
21156 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21157 ins_encode %{
21158 __ movl($tmp2$$Register, 0x7FFFFFFF);
21159 __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21160 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21161 %}
21162 ins_pipe( pipe_slow );
21163 %}
21164
21165 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21166 match(Set dst (CopySignD dst (Binary src zero)));
21167 ins_cost(100);
21168 effect(TEMP tmp1, TEMP tmp2);
21169 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21170 ins_encode %{
21171 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21172 __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21173 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21174 %}
21175 ins_pipe( pipe_slow );
21176 %}
21177
21178 //----------------------------- CompressBits/ExpandBits ------------------------
21179
21180 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21181 predicate(n->bottom_type()->isa_int());
21182 match(Set dst (CompressBits src mask));
21183 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21184 ins_encode %{
21185 __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21186 %}
21187 ins_pipe( pipe_slow );
21188 %}
21189
21190 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21191 predicate(n->bottom_type()->isa_int());
21192 match(Set dst (ExpandBits src mask));
21193 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21194 ins_encode %{
21195 __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21196 %}
21197 ins_pipe( pipe_slow );
21198 %}
21199
21200 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21201 predicate(n->bottom_type()->isa_int());
21202 match(Set dst (CompressBits src (LoadI mask)));
21203 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %}
21204 ins_encode %{
21205 __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21206 %}
21207 ins_pipe( pipe_slow );
21208 %}
21209
21210 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21211 predicate(n->bottom_type()->isa_int());
21212 match(Set dst (ExpandBits src (LoadI mask)));
21213 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %}
21214 ins_encode %{
21215 __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21216 %}
21217 ins_pipe( pipe_slow );
21218 %}
21219
21220 // --------------------------------- Sqrt --------------------------------------
21221
21222 instruct vsqrtF_reg(vec dst, vec src) %{
21223 match(Set dst (SqrtVF src));
21224 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
21225 ins_encode %{
21226 assert(UseAVX > 0, "required");
21227 int vlen_enc = vector_length_encoding(this);
21228 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21229 %}
21230 ins_pipe( pipe_slow );
21231 %}
21232
21233 instruct vsqrtF_mem(vec dst, memory mem) %{
21234 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21235 match(Set dst (SqrtVF (LoadVector mem)));
21236 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
21237 ins_encode %{
21238 assert(UseAVX > 0, "required");
21239 int vlen_enc = vector_length_encoding(this);
21240 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21241 %}
21242 ins_pipe( pipe_slow );
21243 %}
21244
21245 // Floating point vector sqrt
21246 instruct vsqrtD_reg(vec dst, vec src) %{
21247 match(Set dst (SqrtVD src));
21248 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
21249 ins_encode %{
21250 assert(UseAVX > 0, "required");
21251 int vlen_enc = vector_length_encoding(this);
21252 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21253 %}
21254 ins_pipe( pipe_slow );
21255 %}
21256
21257 instruct vsqrtD_mem(vec dst, memory mem) %{
21258 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21259 match(Set dst (SqrtVD (LoadVector mem)));
21260 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
21261 ins_encode %{
21262 assert(UseAVX > 0, "required");
21263 int vlen_enc = vector_length_encoding(this);
21264 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21265 %}
21266 ins_pipe( pipe_slow );
21267 %}
21268
21269 // ------------------------------ Shift ---------------------------------------
21270
21271 // Left and right shift count vectors are the same on x86
21272 // (only lowest bits of xmm reg are used for count).
21273 instruct vshiftcnt(vec dst, rRegI cnt) %{
21274 match(Set dst (LShiftCntV cnt));
21275 match(Set dst (RShiftCntV cnt));
21276 format %{ "movdl $dst,$cnt\t! load shift count" %}
21277 ins_encode %{
21278 __ movdl($dst$$XMMRegister, $cnt$$Register);
21279 %}
21280 ins_pipe( pipe_slow );
21281 %}
21282
21283 // Byte vector shift
21284 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21285 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21286 match(Set dst ( LShiftVB src shift));
21287 match(Set dst ( RShiftVB src shift));
21288 match(Set dst (URShiftVB src shift));
21289 effect(TEMP dst, USE src, USE shift, TEMP tmp);
21290 format %{"vector_byte_shift $dst,$src,$shift" %}
21291 ins_encode %{
21292 assert(UseSSE > 3, "required");
21293 int opcode = this->ideal_Opcode();
21294 bool sign = (opcode != Op_URShiftVB);
21295 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21296 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21297 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21298 __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21299 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21300 %}
21301 ins_pipe( pipe_slow );
21302 %}
21303
21304 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21305 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21306 UseAVX <= 1);
21307 match(Set dst ( LShiftVB src shift));
21308 match(Set dst ( RShiftVB src shift));
21309 match(Set dst (URShiftVB src shift));
21310 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21311 format %{"vector_byte_shift $dst,$src,$shift" %}
21312 ins_encode %{
21313 assert(UseSSE > 3, "required");
21314 int opcode = this->ideal_Opcode();
21315 bool sign = (opcode != Op_URShiftVB);
21316 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21317 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21318 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21319 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21320 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21321 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21322 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21323 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21324 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21325 %}
21326 ins_pipe( pipe_slow );
21327 %}
21328
21329 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21330 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21331 UseAVX > 1);
21332 match(Set dst ( LShiftVB src shift));
21333 match(Set dst ( RShiftVB src shift));
21334 match(Set dst (URShiftVB src shift));
21335 effect(TEMP dst, TEMP tmp);
21336 format %{"vector_byte_shift $dst,$src,$shift" %}
21337 ins_encode %{
21338 int opcode = this->ideal_Opcode();
21339 bool sign = (opcode != Op_URShiftVB);
21340 int vlen_enc = Assembler::AVX_256bit;
21341 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21342 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21343 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21344 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21345 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21346 %}
21347 ins_pipe( pipe_slow );
21348 %}
21349
21350 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21351 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21352 match(Set dst ( LShiftVB src shift));
21353 match(Set dst ( RShiftVB src shift));
21354 match(Set dst (URShiftVB src shift));
21355 effect(TEMP dst, TEMP tmp);
21356 format %{"vector_byte_shift $dst,$src,$shift" %}
21357 ins_encode %{
21358 assert(UseAVX > 1, "required");
21359 int opcode = this->ideal_Opcode();
21360 bool sign = (opcode != Op_URShiftVB);
21361 int vlen_enc = Assembler::AVX_256bit;
21362 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21363 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21364 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21365 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21366 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21367 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21368 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21369 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21370 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21371 %}
21372 ins_pipe( pipe_slow );
21373 %}
21374
21375 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21376 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21377 match(Set dst ( LShiftVB src shift));
21378 match(Set dst (RShiftVB src shift));
21379 match(Set dst (URShiftVB src shift));
21380 effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21381 format %{"vector_byte_shift $dst,$src,$shift" %}
21382 ins_encode %{
21383 assert(UseAVX > 2, "required");
21384 int opcode = this->ideal_Opcode();
21385 bool sign = (opcode != Op_URShiftVB);
21386 int vlen_enc = Assembler::AVX_512bit;
21387 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21388 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21389 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21390 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21391 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21392 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21393 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21394 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21395 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21396 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21397 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21398 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21399 %}
21400 ins_pipe( pipe_slow );
21401 %}
21402
21403 // Shorts vector logical right shift produces incorrect Java result
21404 // for negative data because java code convert short value into int with
21405 // sign extension before a shift. But char vectors are fine since chars are
21406 // unsigned values.
21407 // Shorts/Chars vector left shift
21408 instruct vshiftS(vec dst, vec src, vec shift) %{
21409 predicate(!n->as_ShiftV()->is_var_shift());
21410 match(Set dst ( LShiftVS src shift));
21411 match(Set dst ( RShiftVS src shift));
21412 match(Set dst (URShiftVS src shift));
21413 effect(TEMP dst, USE src, USE shift);
21414 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
21415 ins_encode %{
21416 int opcode = this->ideal_Opcode();
21417 if (UseAVX > 0) {
21418 int vlen_enc = vector_length_encoding(this);
21419 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21420 } else {
21421 int vlen = Matcher::vector_length(this);
21422 if (vlen == 2) {
21423 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21424 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21425 } else if (vlen == 4) {
21426 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21427 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21428 } else {
21429 assert (vlen == 8, "sanity");
21430 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21431 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21432 }
21433 }
21434 %}
21435 ins_pipe( pipe_slow );
21436 %}
21437
21438 // Integers vector left shift
21439 instruct vshiftI(vec dst, vec src, vec shift) %{
21440 predicate(!n->as_ShiftV()->is_var_shift());
21441 match(Set dst ( LShiftVI src shift));
21442 match(Set dst ( RShiftVI src shift));
21443 match(Set dst (URShiftVI src shift));
21444 effect(TEMP dst, USE src, USE shift);
21445 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
21446 ins_encode %{
21447 int opcode = this->ideal_Opcode();
21448 if (UseAVX > 0) {
21449 int vlen_enc = vector_length_encoding(this);
21450 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21451 } else {
21452 int vlen = Matcher::vector_length(this);
21453 if (vlen == 2) {
21454 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21455 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21456 } else {
21457 assert(vlen == 4, "sanity");
21458 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21459 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21460 }
21461 }
21462 %}
21463 ins_pipe( pipe_slow );
21464 %}
21465
21466 // Integers vector left constant shift
21467 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21468 match(Set dst (LShiftVI src (LShiftCntV shift)));
21469 match(Set dst (RShiftVI src (RShiftCntV shift)));
21470 match(Set dst (URShiftVI src (RShiftCntV shift)));
21471 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %}
21472 ins_encode %{
21473 int opcode = this->ideal_Opcode();
21474 if (UseAVX > 0) {
21475 int vector_len = vector_length_encoding(this);
21476 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21477 } else {
21478 int vlen = Matcher::vector_length(this);
21479 if (vlen == 2) {
21480 __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21481 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21482 } else {
21483 assert(vlen == 4, "sanity");
21484 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21485 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21486 }
21487 }
21488 %}
21489 ins_pipe( pipe_slow );
21490 %}
21491
21492 // Longs vector shift
21493 instruct vshiftL(vec dst, vec src, vec shift) %{
21494 predicate(!n->as_ShiftV()->is_var_shift());
21495 match(Set dst ( LShiftVL src shift));
21496 match(Set dst (URShiftVL src shift));
21497 effect(TEMP dst, USE src, USE shift);
21498 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
21499 ins_encode %{
21500 int opcode = this->ideal_Opcode();
21501 if (UseAVX > 0) {
21502 int vlen_enc = vector_length_encoding(this);
21503 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21504 } else {
21505 assert(Matcher::vector_length(this) == 2, "");
21506 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21507 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21508 }
21509 %}
21510 ins_pipe( pipe_slow );
21511 %}
21512
21513 // Longs vector constant shift
21514 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21515 match(Set dst (LShiftVL src (LShiftCntV shift)));
21516 match(Set dst (URShiftVL src (RShiftCntV shift)));
21517 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %}
21518 ins_encode %{
21519 int opcode = this->ideal_Opcode();
21520 if (UseAVX > 0) {
21521 int vector_len = vector_length_encoding(this);
21522 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21523 } else {
21524 assert(Matcher::vector_length(this) == 2, "");
21525 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21526 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21527 }
21528 %}
21529 ins_pipe( pipe_slow );
21530 %}
21531
21532 // -------------------ArithmeticRightShift -----------------------------------
21533 // Long vector arithmetic right shift
21534 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21535 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21536 match(Set dst (RShiftVL src shift));
21537 effect(TEMP dst, TEMP tmp);
21538 format %{ "vshiftq $dst,$src,$shift" %}
21539 ins_encode %{
21540 uint vlen = Matcher::vector_length(this);
21541 if (vlen == 2) {
21542 __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21543 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21544 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21545 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21546 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21547 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21548 } else {
21549 assert(vlen == 4, "sanity");
21550 assert(UseAVX > 1, "required");
21551 int vlen_enc = Assembler::AVX_256bit;
21552 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21553 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21554 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21555 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21556 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21557 }
21558 %}
21559 ins_pipe( pipe_slow );
21560 %}
21561
21562 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21563 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21564 match(Set dst (RShiftVL src shift));
21565 format %{ "vshiftq $dst,$src,$shift" %}
21566 ins_encode %{
21567 int vlen_enc = vector_length_encoding(this);
21568 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21569 %}
21570 ins_pipe( pipe_slow );
21571 %}
21572
21573 // ------------------- Variable Shift -----------------------------
21574 // Byte variable shift
21575 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21576 predicate(Matcher::vector_length(n) <= 8 &&
21577 n->as_ShiftV()->is_var_shift() &&
21578 !VM_Version::supports_avx512bw());
21579 match(Set dst ( LShiftVB src shift));
21580 match(Set dst ( RShiftVB src shift));
21581 match(Set dst (URShiftVB src shift));
21582 effect(TEMP dst, TEMP vtmp);
21583 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21584 ins_encode %{
21585 assert(UseAVX >= 2, "required");
21586
21587 int opcode = this->ideal_Opcode();
21588 int vlen_enc = Assembler::AVX_128bit;
21589 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21590 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21591 %}
21592 ins_pipe( pipe_slow );
21593 %}
21594
21595 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21596 predicate(Matcher::vector_length(n) == 16 &&
21597 n->as_ShiftV()->is_var_shift() &&
21598 !VM_Version::supports_avx512bw());
21599 match(Set dst ( LShiftVB src shift));
21600 match(Set dst ( RShiftVB src shift));
21601 match(Set dst (URShiftVB src shift));
21602 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21603 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21604 ins_encode %{
21605 assert(UseAVX >= 2, "required");
21606
21607 int opcode = this->ideal_Opcode();
21608 int vlen_enc = Assembler::AVX_128bit;
21609 // Shift lower half and get word result in dst
21610 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21611
21612 // Shift upper half and get word result in vtmp1
21613 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21614 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21615 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21616
21617 // Merge and down convert the two word results to byte in dst
21618 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21619 %}
21620 ins_pipe( pipe_slow );
21621 %}
21622
21623 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21624 predicate(Matcher::vector_length(n) == 32 &&
21625 n->as_ShiftV()->is_var_shift() &&
21626 !VM_Version::supports_avx512bw());
21627 match(Set dst ( LShiftVB src shift));
21628 match(Set dst ( RShiftVB src shift));
21629 match(Set dst (URShiftVB src shift));
21630 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21631 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21632 ins_encode %{
21633 assert(UseAVX >= 2, "required");
21634
21635 int opcode = this->ideal_Opcode();
21636 int vlen_enc = Assembler::AVX_128bit;
21637 // Process lower 128 bits and get result in dst
21638 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21639 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21640 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21641 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21642 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21643
21644 // Process higher 128 bits and get result in vtmp3
21645 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21646 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21647 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21648 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21649 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21650 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21651 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21652
21653 // Merge the two results in dst
21654 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21655 %}
21656 ins_pipe( pipe_slow );
21657 %}
21658
21659 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21660 predicate(Matcher::vector_length(n) <= 32 &&
21661 n->as_ShiftV()->is_var_shift() &&
21662 VM_Version::supports_avx512bw());
21663 match(Set dst ( LShiftVB src shift));
21664 match(Set dst ( RShiftVB src shift));
21665 match(Set dst (URShiftVB src shift));
21666 effect(TEMP dst, TEMP vtmp);
21667 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21668 ins_encode %{
21669 assert(UseAVX > 2, "required");
21670
21671 int opcode = this->ideal_Opcode();
21672 int vlen_enc = vector_length_encoding(this);
21673 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21674 %}
21675 ins_pipe( pipe_slow );
21676 %}
21677
21678 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21679 predicate(Matcher::vector_length(n) == 64 &&
21680 n->as_ShiftV()->is_var_shift() &&
21681 VM_Version::supports_avx512bw());
21682 match(Set dst ( LShiftVB src shift));
21683 match(Set dst ( RShiftVB src shift));
21684 match(Set dst (URShiftVB src shift));
21685 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21686 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21687 ins_encode %{
21688 assert(UseAVX > 2, "required");
21689
21690 int opcode = this->ideal_Opcode();
21691 int vlen_enc = Assembler::AVX_256bit;
21692 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21693 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21694 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21695 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21696 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21697 %}
21698 ins_pipe( pipe_slow );
21699 %}
21700
21701 // Short variable shift
21702 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21703 predicate(Matcher::vector_length(n) <= 8 &&
21704 n->as_ShiftV()->is_var_shift() &&
21705 !VM_Version::supports_avx512bw());
21706 match(Set dst ( LShiftVS src shift));
21707 match(Set dst ( RShiftVS src shift));
21708 match(Set dst (URShiftVS src shift));
21709 effect(TEMP dst, TEMP vtmp);
21710 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21711 ins_encode %{
21712 assert(UseAVX >= 2, "required");
21713
21714 int opcode = this->ideal_Opcode();
21715 bool sign = (opcode != Op_URShiftVS);
21716 int vlen_enc = Assembler::AVX_256bit;
21717 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21718 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21719 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21720 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21721 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21722 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21723 %}
21724 ins_pipe( pipe_slow );
21725 %}
21726
21727 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21728 predicate(Matcher::vector_length(n) == 16 &&
21729 n->as_ShiftV()->is_var_shift() &&
21730 !VM_Version::supports_avx512bw());
21731 match(Set dst ( LShiftVS src shift));
21732 match(Set dst ( RShiftVS src shift));
21733 match(Set dst (URShiftVS src shift));
21734 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21735 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21736 ins_encode %{
21737 assert(UseAVX >= 2, "required");
21738
21739 int opcode = this->ideal_Opcode();
21740 bool sign = (opcode != Op_URShiftVS);
21741 int vlen_enc = Assembler::AVX_256bit;
21742 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21743 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21744 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21745 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21746 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21747
21748 // Shift upper half, with result in dst using vtmp1 as TEMP
21749 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21750 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21751 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21752 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21753 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21754 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21755
21756 // Merge lower and upper half result into dst
21757 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21758 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21759 %}
21760 ins_pipe( pipe_slow );
21761 %}
21762
21763 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21764 predicate(n->as_ShiftV()->is_var_shift() &&
21765 VM_Version::supports_avx512bw());
21766 match(Set dst ( LShiftVS src shift));
21767 match(Set dst ( RShiftVS src shift));
21768 match(Set dst (URShiftVS src shift));
21769 format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21770 ins_encode %{
21771 assert(UseAVX > 2, "required");
21772
21773 int opcode = this->ideal_Opcode();
21774 int vlen_enc = vector_length_encoding(this);
21775 if (!VM_Version::supports_avx512vl()) {
21776 vlen_enc = Assembler::AVX_512bit;
21777 }
21778 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21779 %}
21780 ins_pipe( pipe_slow );
21781 %}
21782
21783 //Integer variable shift
21784 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21785 predicate(n->as_ShiftV()->is_var_shift());
21786 match(Set dst ( LShiftVI src shift));
21787 match(Set dst ( RShiftVI src shift));
21788 match(Set dst (URShiftVI src shift));
21789 format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21790 ins_encode %{
21791 assert(UseAVX >= 2, "required");
21792
21793 int opcode = this->ideal_Opcode();
21794 int vlen_enc = vector_length_encoding(this);
21795 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21796 %}
21797 ins_pipe( pipe_slow );
21798 %}
21799
21800 //Long variable shift
21801 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21802 predicate(n->as_ShiftV()->is_var_shift());
21803 match(Set dst ( LShiftVL src shift));
21804 match(Set dst (URShiftVL src shift));
21805 format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21806 ins_encode %{
21807 assert(UseAVX >= 2, "required");
21808
21809 int opcode = this->ideal_Opcode();
21810 int vlen_enc = vector_length_encoding(this);
21811 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21812 %}
21813 ins_pipe( pipe_slow );
21814 %}
21815
21816 //Long variable right shift arithmetic
21817 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21818 predicate(Matcher::vector_length(n) <= 4 &&
21819 n->as_ShiftV()->is_var_shift() &&
21820 UseAVX == 2);
21821 match(Set dst (RShiftVL src shift));
21822 effect(TEMP dst, TEMP vtmp);
21823 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21824 ins_encode %{
21825 int opcode = this->ideal_Opcode();
21826 int vlen_enc = vector_length_encoding(this);
21827 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21828 $vtmp$$XMMRegister);
21829 %}
21830 ins_pipe( pipe_slow );
21831 %}
21832
21833 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21834 predicate(n->as_ShiftV()->is_var_shift() &&
21835 UseAVX > 2);
21836 match(Set dst (RShiftVL src shift));
21837 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21838 ins_encode %{
21839 int opcode = this->ideal_Opcode();
21840 int vlen_enc = vector_length_encoding(this);
21841 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21842 %}
21843 ins_pipe( pipe_slow );
21844 %}
21845
21846 // --------------------------------- AND --------------------------------------
21847
21848 instruct vand(vec dst, vec src) %{
21849 predicate(UseAVX == 0);
21850 match(Set dst (AndV dst src));
21851 format %{ "pand $dst,$src\t! and vectors" %}
21852 ins_encode %{
21853 __ pand($dst$$XMMRegister, $src$$XMMRegister);
21854 %}
21855 ins_pipe( pipe_slow );
21856 %}
21857
21858 instruct vand_reg(vec dst, vec src1, vec src2) %{
21859 predicate(UseAVX > 0);
21860 match(Set dst (AndV src1 src2));
21861 format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
21862 ins_encode %{
21863 int vlen_enc = vector_length_encoding(this);
21864 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21865 %}
21866 ins_pipe( pipe_slow );
21867 %}
21868
21869 instruct vand_mem(vec dst, vec src, memory mem) %{
21870 predicate((UseAVX > 0) &&
21871 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21872 match(Set dst (AndV src (LoadVector mem)));
21873 format %{ "vpand $dst,$src,$mem\t! and vectors" %}
21874 ins_encode %{
21875 int vlen_enc = vector_length_encoding(this);
21876 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21877 %}
21878 ins_pipe( pipe_slow );
21879 %}
21880
21881 // --------------------------------- OR ---------------------------------------
21882
21883 instruct vor(vec dst, vec src) %{
21884 predicate(UseAVX == 0);
21885 match(Set dst (OrV dst src));
21886 format %{ "por $dst,$src\t! or vectors" %}
21887 ins_encode %{
21888 __ por($dst$$XMMRegister, $src$$XMMRegister);
21889 %}
21890 ins_pipe( pipe_slow );
21891 %}
21892
21893 instruct vor_reg(vec dst, vec src1, vec src2) %{
21894 predicate(UseAVX > 0);
21895 match(Set dst (OrV src1 src2));
21896 format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
21897 ins_encode %{
21898 int vlen_enc = vector_length_encoding(this);
21899 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21900 %}
21901 ins_pipe( pipe_slow );
21902 %}
21903
21904 instruct vor_mem(vec dst, vec src, memory mem) %{
21905 predicate((UseAVX > 0) &&
21906 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21907 match(Set dst (OrV src (LoadVector mem)));
21908 format %{ "vpor $dst,$src,$mem\t! or vectors" %}
21909 ins_encode %{
21910 int vlen_enc = vector_length_encoding(this);
21911 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21912 %}
21913 ins_pipe( pipe_slow );
21914 %}
21915
21916 // --------------------------------- XOR --------------------------------------
21917
21918 instruct vxor(vec dst, vec src) %{
21919 predicate(UseAVX == 0);
21920 match(Set dst (XorV dst src));
21921 format %{ "pxor $dst,$src\t! xor vectors" %}
21922 ins_encode %{
21923 __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21924 %}
21925 ins_pipe( pipe_slow );
21926 %}
21927
21928 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21929 predicate(UseAVX > 0);
21930 match(Set dst (XorV src1 src2));
21931 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
21932 ins_encode %{
21933 int vlen_enc = vector_length_encoding(this);
21934 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21935 %}
21936 ins_pipe( pipe_slow );
21937 %}
21938
21939 instruct vxor_mem(vec dst, vec src, memory mem) %{
21940 predicate((UseAVX > 0) &&
21941 (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21942 match(Set dst (XorV src (LoadVector mem)));
21943 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
21944 ins_encode %{
21945 int vlen_enc = vector_length_encoding(this);
21946 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21947 %}
21948 ins_pipe( pipe_slow );
21949 %}
21950
21951 // --------------------------------- VectorCast --------------------------------------
21952
21953 instruct vcastBtoX(vec dst, vec src) %{
21954 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21955 match(Set dst (VectorCastB2X src));
21956 format %{ "vector_cast_b2x $dst,$src\t!" %}
21957 ins_encode %{
21958 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21959 int vlen_enc = vector_length_encoding(this);
21960 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21961 %}
21962 ins_pipe( pipe_slow );
21963 %}
21964
21965 instruct vcastBtoD(legVec dst, legVec src) %{
21966 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21967 match(Set dst (VectorCastB2X src));
21968 format %{ "vector_cast_b2x $dst,$src\t!" %}
21969 ins_encode %{
21970 int vlen_enc = vector_length_encoding(this);
21971 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21972 %}
21973 ins_pipe( pipe_slow );
21974 %}
21975
21976 instruct castStoX(vec dst, vec src) %{
21977 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21978 Matcher::vector_length(n->in(1)) <= 8 && // src
21979 Matcher::vector_element_basic_type(n) == T_BYTE);
21980 match(Set dst (VectorCastS2X src));
21981 format %{ "vector_cast_s2x $dst,$src" %}
21982 ins_encode %{
21983 assert(UseAVX > 0, "required");
21984
21985 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21986 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21987 %}
21988 ins_pipe( pipe_slow );
21989 %}
21990
21991 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21992 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21993 Matcher::vector_length(n->in(1)) == 16 && // src
21994 Matcher::vector_element_basic_type(n) == T_BYTE);
21995 effect(TEMP dst, TEMP vtmp);
21996 match(Set dst (VectorCastS2X src));
21997 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21998 ins_encode %{
21999 assert(UseAVX > 0, "required");
22000
22001 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22002 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22003 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22004 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22005 %}
22006 ins_pipe( pipe_slow );
22007 %}
22008
22009 instruct vcastStoX_evex(vec dst, vec src) %{
22010 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22011 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22012 match(Set dst (VectorCastS2X src));
22013 format %{ "vector_cast_s2x $dst,$src\t!" %}
22014 ins_encode %{
22015 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22016 int src_vlen_enc = vector_length_encoding(this, $src);
22017 int vlen_enc = vector_length_encoding(this);
22018 switch (to_elem_bt) {
22019 case T_BYTE:
22020 if (!VM_Version::supports_avx512vl()) {
22021 vlen_enc = Assembler::AVX_512bit;
22022 }
22023 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22024 break;
22025 case T_INT:
22026 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22027 break;
22028 case T_FLOAT:
22029 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22030 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22031 break;
22032 case T_LONG:
22033 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22034 break;
22035 case T_DOUBLE: {
22036 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22037 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22038 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22039 break;
22040 }
22041 default:
22042 ShouldNotReachHere();
22043 }
22044 %}
22045 ins_pipe( pipe_slow );
22046 %}
22047
22048 instruct castItoX(vec dst, vec src) %{
22049 predicate(UseAVX <= 2 &&
22050 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22051 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22052 match(Set dst (VectorCastI2X src));
22053 format %{ "vector_cast_i2x $dst,$src" %}
22054 ins_encode %{
22055 assert(UseAVX > 0, "required");
22056
22057 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22058 int vlen_enc = vector_length_encoding(this, $src);
22059
22060 if (to_elem_bt == T_BYTE) {
22061 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22062 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22063 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22064 } else {
22065 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22066 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22067 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22068 }
22069 %}
22070 ins_pipe( pipe_slow );
22071 %}
22072
22073 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22074 predicate(UseAVX <= 2 &&
22075 (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22076 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22077 match(Set dst (VectorCastI2X src));
22078 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22079 effect(TEMP dst, TEMP vtmp);
22080 ins_encode %{
22081 assert(UseAVX > 0, "required");
22082
22083 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22084 int vlen_enc = vector_length_encoding(this, $src);
22085
22086 if (to_elem_bt == T_BYTE) {
22087 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22088 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22089 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22090 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22091 } else {
22092 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22093 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22094 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22095 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22096 }
22097 %}
22098 ins_pipe( pipe_slow );
22099 %}
22100
22101 instruct vcastItoX_evex(vec dst, vec src) %{
22102 predicate(UseAVX > 2 ||
22103 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22104 match(Set dst (VectorCastI2X src));
22105 format %{ "vector_cast_i2x $dst,$src\t!" %}
22106 ins_encode %{
22107 assert(UseAVX > 0, "required");
22108
22109 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22110 int src_vlen_enc = vector_length_encoding(this, $src);
22111 int dst_vlen_enc = vector_length_encoding(this);
22112 switch (dst_elem_bt) {
22113 case T_BYTE:
22114 if (!VM_Version::supports_avx512vl()) {
22115 src_vlen_enc = Assembler::AVX_512bit;
22116 }
22117 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22118 break;
22119 case T_SHORT:
22120 if (!VM_Version::supports_avx512vl()) {
22121 src_vlen_enc = Assembler::AVX_512bit;
22122 }
22123 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22124 break;
22125 case T_FLOAT:
22126 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22127 break;
22128 case T_LONG:
22129 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22130 break;
22131 case T_DOUBLE:
22132 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22133 break;
22134 default:
22135 ShouldNotReachHere();
22136 }
22137 %}
22138 ins_pipe( pipe_slow );
22139 %}
22140
22141 instruct vcastLtoBS(vec dst, vec src) %{
22142 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22143 UseAVX <= 2);
22144 match(Set dst (VectorCastL2X src));
22145 format %{ "vector_cast_l2x $dst,$src" %}
22146 ins_encode %{
22147 assert(UseAVX > 0, "required");
22148
22149 int vlen = Matcher::vector_length_in_bytes(this, $src);
22150 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22151 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22152 : ExternalAddress(vector_int_to_short_mask());
22153 if (vlen <= 16) {
22154 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22155 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22156 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22157 } else {
22158 assert(vlen <= 32, "required");
22159 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22160 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22161 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22162 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22163 }
22164 if (to_elem_bt == T_BYTE) {
22165 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22166 }
22167 %}
22168 ins_pipe( pipe_slow );
22169 %}
22170
22171 instruct vcastLtoX_evex(vec dst, vec src) %{
22172 predicate(UseAVX > 2 ||
22173 (Matcher::vector_element_basic_type(n) == T_INT ||
22174 Matcher::vector_element_basic_type(n) == T_FLOAT ||
22175 Matcher::vector_element_basic_type(n) == T_DOUBLE));
22176 match(Set dst (VectorCastL2X src));
22177 format %{ "vector_cast_l2x $dst,$src\t!" %}
22178 ins_encode %{
22179 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22180 int vlen = Matcher::vector_length_in_bytes(this, $src);
22181 int vlen_enc = vector_length_encoding(this, $src);
22182 switch (to_elem_bt) {
22183 case T_BYTE:
22184 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22185 vlen_enc = Assembler::AVX_512bit;
22186 }
22187 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22188 break;
22189 case T_SHORT:
22190 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22191 vlen_enc = Assembler::AVX_512bit;
22192 }
22193 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22194 break;
22195 case T_INT:
22196 if (vlen == 8) {
22197 if ($dst$$XMMRegister != $src$$XMMRegister) {
22198 __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22199 }
22200 } else if (vlen == 16) {
22201 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22202 } else if (vlen == 32) {
22203 if (UseAVX > 2) {
22204 if (!VM_Version::supports_avx512vl()) {
22205 vlen_enc = Assembler::AVX_512bit;
22206 }
22207 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22208 } else {
22209 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22210 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22211 }
22212 } else { // vlen == 64
22213 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22214 }
22215 break;
22216 case T_FLOAT:
22217 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22218 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22219 break;
22220 case T_DOUBLE:
22221 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22222 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22223 break;
22224
22225 default: assert(false, "%s", type2name(to_elem_bt));
22226 }
22227 %}
22228 ins_pipe( pipe_slow );
22229 %}
22230
22231 instruct vcastFtoD_reg(vec dst, vec src) %{
22232 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22233 match(Set dst (VectorCastF2X src));
22234 format %{ "vector_cast_f2d $dst,$src\t!" %}
22235 ins_encode %{
22236 int vlen_enc = vector_length_encoding(this);
22237 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22238 %}
22239 ins_pipe( pipe_slow );
22240 %}
22241
22242
22243 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22244 predicate(!VM_Version::supports_avx10_2() &&
22245 !VM_Version::supports_avx512vl() &&
22246 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22247 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22248 is_integral_type(Matcher::vector_element_basic_type(n)));
22249 match(Set dst (VectorCastF2X src));
22250 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22251 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22252 ins_encode %{
22253 int vlen_enc = vector_length_encoding(this, $src);
22254 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22255 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22256 // 32 bit addresses for register indirect addressing mode since stub constants
22257 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22258 // However, targets are free to increase this limit, but having a large code cache size
22259 // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22260 // cap we save a temporary register allocation which in limiting case can prevent
22261 // spilling in high register pressure blocks.
22262 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22263 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22264 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22265 %}
22266 ins_pipe( pipe_slow );
22267 %}
22268
22269 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22270 predicate(!VM_Version::supports_avx10_2() &&
22271 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22272 is_integral_type(Matcher::vector_element_basic_type(n)));
22273 match(Set dst (VectorCastF2X src));
22274 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22275 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22276 ins_encode %{
22277 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22278 if (to_elem_bt == T_LONG) {
22279 int vlen_enc = vector_length_encoding(this);
22280 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22281 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22282 ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22283 } else {
22284 int vlen_enc = vector_length_encoding(this, $src);
22285 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22286 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22287 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22288 }
22289 %}
22290 ins_pipe( pipe_slow );
22291 %}
22292
22293 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22294 predicate(VM_Version::supports_avx10_2() &&
22295 is_integral_type(Matcher::vector_element_basic_type(n)));
22296 match(Set dst (VectorCastF2X src));
22297 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22298 ins_encode %{
22299 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22300 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22301 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22302 %}
22303 ins_pipe( pipe_slow );
22304 %}
22305
22306 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22307 predicate(VM_Version::supports_avx10_2() &&
22308 is_integral_type(Matcher::vector_element_basic_type(n)));
22309 match(Set dst (VectorCastF2X (LoadVector src)));
22310 format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22311 ins_encode %{
22312 int vlen = Matcher::vector_length(this);
22313 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22314 int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22315 __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22316 %}
22317 ins_pipe( pipe_slow );
22318 %}
22319
22320 instruct vcastDtoF_reg(vec dst, vec src) %{
22321 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22322 match(Set dst (VectorCastD2X src));
22323 format %{ "vector_cast_d2x $dst,$src\t!" %}
22324 ins_encode %{
22325 int vlen_enc = vector_length_encoding(this, $src);
22326 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22327 %}
22328 ins_pipe( pipe_slow );
22329 %}
22330
22331 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22332 predicate(!VM_Version::supports_avx10_2() &&
22333 !VM_Version::supports_avx512vl() &&
22334 Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22335 is_integral_type(Matcher::vector_element_basic_type(n)));
22336 match(Set dst (VectorCastD2X src));
22337 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22338 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22339 ins_encode %{
22340 int vlen_enc = vector_length_encoding(this, $src);
22341 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22342 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22343 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22344 ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22345 %}
22346 ins_pipe( pipe_slow );
22347 %}
22348
22349 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22350 predicate(!VM_Version::supports_avx10_2() &&
22351 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22352 is_integral_type(Matcher::vector_element_basic_type(n)));
22353 match(Set dst (VectorCastD2X src));
22354 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22355 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22356 ins_encode %{
22357 int vlen_enc = vector_length_encoding(this, $src);
22358 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22359 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22360 ExternalAddress(vector_float_signflip());
22361 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22362 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22363 %}
22364 ins_pipe( pipe_slow );
22365 %}
22366
22367 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22368 predicate(VM_Version::supports_avx10_2() &&
22369 is_integral_type(Matcher::vector_element_basic_type(n)));
22370 match(Set dst (VectorCastD2X src));
22371 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22372 ins_encode %{
22373 int vlen_enc = vector_length_encoding(this, $src);
22374 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22375 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22376 %}
22377 ins_pipe( pipe_slow );
22378 %}
22379
22380 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22381 predicate(VM_Version::supports_avx10_2() &&
22382 is_integral_type(Matcher::vector_element_basic_type(n)));
22383 match(Set dst (VectorCastD2X (LoadVector src)));
22384 format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22385 ins_encode %{
22386 int vlen = Matcher::vector_length(this);
22387 int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22388 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22389 __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22390 %}
22391 ins_pipe( pipe_slow );
22392 %}
22393
22394 instruct vucast(vec dst, vec src) %{
22395 match(Set dst (VectorUCastB2X src));
22396 match(Set dst (VectorUCastS2X src));
22397 match(Set dst (VectorUCastI2X src));
22398 format %{ "vector_ucast $dst,$src\t!" %}
22399 ins_encode %{
22400 assert(UseAVX > 0, "required");
22401
22402 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22403 BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22404 int vlen_enc = vector_length_encoding(this);
22405 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22406 %}
22407 ins_pipe( pipe_slow );
22408 %}
22409
22410 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22411 predicate(!VM_Version::supports_avx512vl() &&
22412 Matcher::vector_length_in_bytes(n) < 64 &&
22413 Matcher::vector_element_basic_type(n) == T_INT);
22414 match(Set dst (RoundVF src));
22415 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22416 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22417 ins_encode %{
22418 int vlen_enc = vector_length_encoding(this);
22419 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22420 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22421 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22422 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22423 %}
22424 ins_pipe( pipe_slow );
22425 %}
22426
22427 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22428 predicate((VM_Version::supports_avx512vl() ||
22429 Matcher::vector_length_in_bytes(n) == 64) &&
22430 Matcher::vector_element_basic_type(n) == T_INT);
22431 match(Set dst (RoundVF src));
22432 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22433 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22434 ins_encode %{
22435 int vlen_enc = vector_length_encoding(this);
22436 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22437 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22438 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22439 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22440 %}
22441 ins_pipe( pipe_slow );
22442 %}
22443
22444 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22445 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22446 match(Set dst (RoundVD src));
22447 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22448 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22449 ins_encode %{
22450 int vlen_enc = vector_length_encoding(this);
22451 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22452 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22453 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22454 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22455 %}
22456 ins_pipe( pipe_slow );
22457 %}
22458
22459 // --------------------------------- VectorMaskCmp --------------------------------------
22460
22461 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22462 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22463 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
22464 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22465 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22466 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22467 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22468 ins_encode %{
22469 int vlen_enc = vector_length_encoding(this, $src1);
22470 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22471 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22472 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22473 } else {
22474 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22475 }
22476 %}
22477 ins_pipe( pipe_slow );
22478 %}
22479
22480 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22481 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22482 n->bottom_type()->isa_pvectmask() == nullptr &&
22483 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22484 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22485 effect(TEMP ktmp);
22486 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22487 ins_encode %{
22488 int vlen_enc = Assembler::AVX_512bit;
22489 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22490 KRegister mask = k0; // The comparison itself is not being masked.
22491 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22492 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22493 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22494 } else {
22495 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22496 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22497 }
22498 %}
22499 ins_pipe( pipe_slow );
22500 %}
22501
22502 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22503 predicate(n->bottom_type()->isa_pvectmask() &&
22504 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22505 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22506 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22507 ins_encode %{
22508 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22509 int vlen_enc = vector_length_encoding(this, $src1);
22510 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22511 KRegister mask = k0; // The comparison itself is not being masked.
22512 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22513 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22514 } else {
22515 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22516 }
22517 %}
22518 ins_pipe( pipe_slow );
22519 %}
22520
22521 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22522 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22523 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22524 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22525 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22526 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22527 (n->in(2)->get_int() == BoolTest::eq ||
22528 n->in(2)->get_int() == BoolTest::lt ||
22529 n->in(2)->get_int() == BoolTest::gt)); // cond
22530 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22531 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22532 ins_encode %{
22533 int vlen_enc = vector_length_encoding(this, $src1);
22534 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22535 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22536 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22537 %}
22538 ins_pipe( pipe_slow );
22539 %}
22540
22541 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22542 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22543 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22544 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22545 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22546 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22547 (n->in(2)->get_int() == BoolTest::ne ||
22548 n->in(2)->get_int() == BoolTest::le ||
22549 n->in(2)->get_int() == BoolTest::ge)); // cond
22550 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22551 effect(TEMP dst, TEMP xtmp);
22552 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22553 ins_encode %{
22554 int vlen_enc = vector_length_encoding(this, $src1);
22555 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22556 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22557 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22558 %}
22559 ins_pipe( pipe_slow );
22560 %}
22561
22562 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22563 predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22564 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22565 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
22566 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22567 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22568 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22569 effect(TEMP dst, TEMP xtmp);
22570 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22571 ins_encode %{
22572 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22573 int vlen_enc = vector_length_encoding(this, $src1);
22574 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22575 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22576
22577 if (vlen_enc == Assembler::AVX_128bit) {
22578 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22579 } else {
22580 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22581 }
22582 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22583 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22584 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22585 %}
22586 ins_pipe( pipe_slow );
22587 %}
22588
22589 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22590 predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22591 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22592 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22593 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22594 effect(TEMP ktmp);
22595 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22596 ins_encode %{
22597 assert(UseAVX > 2, "required");
22598
22599 int vlen_enc = vector_length_encoding(this, $src1);
22600 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22601 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22602 KRegister mask = k0; // The comparison itself is not being masked.
22603 bool merge = false;
22604 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22605
22606 switch (src1_elem_bt) {
22607 case T_INT: {
22608 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22609 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22610 break;
22611 }
22612 case T_LONG: {
22613 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22614 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22615 break;
22616 }
22617 default: assert(false, "%s", type2name(src1_elem_bt));
22618 }
22619 %}
22620 ins_pipe( pipe_slow );
22621 %}
22622
22623
22624 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22625 predicate(n->bottom_type()->isa_pvectmask() &&
22626 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22627 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22628 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22629 ins_encode %{
22630 assert(UseAVX > 2, "required");
22631 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22632
22633 int vlen_enc = vector_length_encoding(this, $src1);
22634 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22635 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22636 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22637
22638 // Comparison i
22639 switch (src1_elem_bt) {
22640 case T_BYTE: {
22641 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22642 break;
22643 }
22644 case T_SHORT: {
22645 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22646 break;
22647 }
22648 case T_INT: {
22649 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22650 break;
22651 }
22652 case T_LONG: {
22653 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22654 break;
22655 }
22656 default: assert(false, "%s", type2name(src1_elem_bt));
22657 }
22658 %}
22659 ins_pipe( pipe_slow );
22660 %}
22661
22662 // Extract
22663
22664 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22665 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22666 match(Set dst (ExtractI src idx));
22667 match(Set dst (ExtractS src idx));
22668 match(Set dst (ExtractB src idx));
22669 format %{ "extractI $dst,$src,$idx\t!" %}
22670 ins_encode %{
22671 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22672
22673 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22674 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22675 %}
22676 ins_pipe( pipe_slow );
22677 %}
22678
22679 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22680 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22681 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src
22682 match(Set dst (ExtractI src idx));
22683 match(Set dst (ExtractS src idx));
22684 match(Set dst (ExtractB src idx));
22685 effect(TEMP vtmp);
22686 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22687 ins_encode %{
22688 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22689
22690 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22691 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22692 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22693 %}
22694 ins_pipe( pipe_slow );
22695 %}
22696
22697 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22698 predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22699 match(Set dst (ExtractL src idx));
22700 format %{ "extractL $dst,$src,$idx\t!" %}
22701 ins_encode %{
22702 assert(UseSSE >= 4, "required");
22703 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22704
22705 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22706 %}
22707 ins_pipe( pipe_slow );
22708 %}
22709
22710 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22711 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22712 Matcher::vector_length(n->in(1)) == 8); // src
22713 match(Set dst (ExtractL src idx));
22714 effect(TEMP vtmp);
22715 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22716 ins_encode %{
22717 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22718
22719 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22720 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22721 %}
22722 ins_pipe( pipe_slow );
22723 %}
22724
22725 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22726 predicate(Matcher::vector_length(n->in(1)) <= 4);
22727 match(Set dst (ExtractF src idx));
22728 effect(TEMP dst, TEMP vtmp);
22729 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22730 ins_encode %{
22731 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22732
22733 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22734 %}
22735 ins_pipe( pipe_slow );
22736 %}
22737
22738 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22739 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22740 Matcher::vector_length(n->in(1)/*src*/) == 16);
22741 match(Set dst (ExtractF src idx));
22742 effect(TEMP vtmp);
22743 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22744 ins_encode %{
22745 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22746
22747 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22748 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22749 %}
22750 ins_pipe( pipe_slow );
22751 %}
22752
22753 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22754 predicate(Matcher::vector_length(n->in(1)) == 2); // src
22755 match(Set dst (ExtractD src idx));
22756 format %{ "extractD $dst,$src,$idx\t!" %}
22757 ins_encode %{
22758 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22759
22760 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22761 %}
22762 ins_pipe( pipe_slow );
22763 %}
22764
22765 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22766 predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22767 Matcher::vector_length(n->in(1)) == 8); // src
22768 match(Set dst (ExtractD src idx));
22769 effect(TEMP vtmp);
22770 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22771 ins_encode %{
22772 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22773
22774 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22775 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22776 %}
22777 ins_pipe( pipe_slow );
22778 %}
22779
22780 // --------------------------------- Vector Blend --------------------------------------
22781
22782 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22783 predicate(UseAVX == 0);
22784 match(Set dst (VectorBlend (Binary dst src) mask));
22785 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
22786 effect(TEMP tmp);
22787 ins_encode %{
22788 assert(UseSSE >= 4, "required");
22789
22790 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22791 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22792 }
22793 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22794 %}
22795 ins_pipe( pipe_slow );
22796 %}
22797
22798 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22799 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22800 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22801 Matcher::vector_length_in_bytes(n) <= 32 &&
22802 is_integral_type(Matcher::vector_element_basic_type(n)));
22803 match(Set dst (VectorBlend (Binary src1 src2) mask));
22804 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22805 ins_encode %{
22806 int vlen_enc = vector_length_encoding(this);
22807 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22808 %}
22809 ins_pipe( pipe_slow );
22810 %}
22811
22812 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22813 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22814 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22815 Matcher::vector_length_in_bytes(n) <= 32 &&
22816 !is_integral_type(Matcher::vector_element_basic_type(n)));
22817 match(Set dst (VectorBlend (Binary src1 src2) mask));
22818 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
22819 ins_encode %{
22820 int vlen_enc = vector_length_encoding(this);
22821 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22822 %}
22823 ins_pipe( pipe_slow );
22824 %}
22825
22826 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22827 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22828 n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22829 Matcher::vector_length_in_bytes(n) <= 32);
22830 match(Set dst (VectorBlend (Binary src1 src2) mask));
22831 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22832 effect(TEMP vtmp, TEMP dst);
22833 ins_encode %{
22834 int vlen_enc = vector_length_encoding(this);
22835 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22836 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22837 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22838 %}
22839 ins_pipe( pipe_slow );
22840 %}
22841
22842 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22843 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22844 n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22845 match(Set dst (VectorBlend (Binary src1 src2) mask));
22846 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22847 effect(TEMP ktmp);
22848 ins_encode %{
22849 int vlen_enc = Assembler::AVX_512bit;
22850 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22851 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22852 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22853 %}
22854 ins_pipe( pipe_slow );
22855 %}
22856
22857
22858 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22859 predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22860 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22861 VM_Version::supports_avx512bw()));
22862 match(Set dst (VectorBlend (Binary src1 src2) mask));
22863 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22864 ins_encode %{
22865 int vlen_enc = vector_length_encoding(this);
22866 BasicType elem_bt = Matcher::vector_element_basic_type(this);
22867 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22868 %}
22869 ins_pipe( pipe_slow );
22870 %}
22871
22872 // --------------------------------- ABS --------------------------------------
22873 // a = |a|
22874 instruct vabsB_reg(vec dst, vec src) %{
22875 match(Set dst (AbsVB src));
22876 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22877 ins_encode %{
22878 uint vlen = Matcher::vector_length(this);
22879 if (vlen <= 16) {
22880 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22881 } else {
22882 int vlen_enc = vector_length_encoding(this);
22883 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22884 }
22885 %}
22886 ins_pipe( pipe_slow );
22887 %}
22888
22889 instruct vabsS_reg(vec dst, vec src) %{
22890 match(Set dst (AbsVS src));
22891 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22892 ins_encode %{
22893 uint vlen = Matcher::vector_length(this);
22894 if (vlen <= 8) {
22895 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22896 } else {
22897 int vlen_enc = vector_length_encoding(this);
22898 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22899 }
22900 %}
22901 ins_pipe( pipe_slow );
22902 %}
22903
22904 instruct vabsI_reg(vec dst, vec src) %{
22905 match(Set dst (AbsVI src));
22906 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22907 ins_encode %{
22908 uint vlen = Matcher::vector_length(this);
22909 if (vlen <= 4) {
22910 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22911 } else {
22912 int vlen_enc = vector_length_encoding(this);
22913 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22914 }
22915 %}
22916 ins_pipe( pipe_slow );
22917 %}
22918
22919 instruct vabsL_reg(vec dst, vec src) %{
22920 match(Set dst (AbsVL src));
22921 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22922 ins_encode %{
22923 assert(UseAVX > 2, "required");
22924 int vlen_enc = vector_length_encoding(this);
22925 if (!VM_Version::supports_avx512vl()) {
22926 vlen_enc = Assembler::AVX_512bit;
22927 }
22928 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22929 %}
22930 ins_pipe( pipe_slow );
22931 %}
22932
22933 // --------------------------------- ABSNEG --------------------------------------
22934
22935 instruct vabsnegF(vec dst, vec src) %{
22936 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22937 match(Set dst (AbsVF src));
22938 match(Set dst (NegVF src));
22939 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22940 ins_cost(150);
22941 ins_encode %{
22942 int opcode = this->ideal_Opcode();
22943 int vlen = Matcher::vector_length(this);
22944 if (vlen == 2) {
22945 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22946 } else {
22947 assert(vlen == 8 || vlen == 16, "required");
22948 int vlen_enc = vector_length_encoding(this);
22949 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22950 }
22951 %}
22952 ins_pipe( pipe_slow );
22953 %}
22954
22955 instruct vabsneg4F(vec dst) %{
22956 predicate(Matcher::vector_length(n) == 4);
22957 match(Set dst (AbsVF dst));
22958 match(Set dst (NegVF dst));
22959 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22960 ins_cost(150);
22961 ins_encode %{
22962 int opcode = this->ideal_Opcode();
22963 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22964 %}
22965 ins_pipe( pipe_slow );
22966 %}
22967
22968 instruct vabsnegD(vec dst, vec src) %{
22969 match(Set dst (AbsVD src));
22970 match(Set dst (NegVD src));
22971 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22972 ins_encode %{
22973 int opcode = this->ideal_Opcode();
22974 uint vlen = Matcher::vector_length(this);
22975 if (vlen == 2) {
22976 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22977 } else {
22978 int vlen_enc = vector_length_encoding(this);
22979 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22980 }
22981 %}
22982 ins_pipe( pipe_slow );
22983 %}
22984
22985 //------------------------------------- VectorTest --------------------------------------------
22986
22987 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22988 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22989 match(Set cr (VectorTest src1 src2));
22990 effect(TEMP vtmp);
22991 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %}
22992 ins_encode %{
22993 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22994 int vlen = Matcher::vector_length_in_bytes(this, $src1);
22995 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22996 %}
22997 ins_pipe( pipe_slow );
22998 %}
22999
23000 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23001 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23002 match(Set cr (VectorTest src1 src2));
23003 format %{ "vptest_ge16 $src1, $src2\n\t" %}
23004 ins_encode %{
23005 BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23006 int vlen = Matcher::vector_length_in_bytes(this, $src1);
23007 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23008 %}
23009 ins_pipe( pipe_slow );
23010 %}
23011
23012 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23013 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23014 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23015 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23016 match(Set cr (VectorTest src1 src2));
23017 effect(TEMP tmp);
23018 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23019 ins_encode %{
23020 uint masklen = Matcher::vector_length(this, $src1);
23021 __ kmovwl($tmp$$Register, $src1$$KRegister);
23022 __ andl($tmp$$Register, (1 << masklen) - 1);
23023 __ cmpl($tmp$$Register, (1 << masklen) - 1);
23024 %}
23025 ins_pipe( pipe_slow );
23026 %}
23027
23028 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23029 predicate((Matcher::vector_length(n->in(1)) < 8 ||
23030 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23031 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23032 match(Set cr (VectorTest src1 src2));
23033 effect(TEMP tmp);
23034 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %}
23035 ins_encode %{
23036 uint masklen = Matcher::vector_length(this, $src1);
23037 __ kmovwl($tmp$$Register, $src1$$KRegister);
23038 __ andl($tmp$$Register, (1 << masklen) - 1);
23039 %}
23040 ins_pipe( pipe_slow );
23041 %}
23042
23043 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23044 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23045 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23046 match(Set cr (VectorTest src1 src2));
23047 format %{ "ktest_ge8 $src1, $src2\n\t" %}
23048 ins_encode %{
23049 uint masklen = Matcher::vector_length(this, $src1);
23050 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23051 %}
23052 ins_pipe( pipe_slow );
23053 %}
23054
23055 //------------------------------------- LoadMask --------------------------------------------
23056
23057 instruct loadMask(legVec dst, legVec src) %{
23058 predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23059 match(Set dst (VectorLoadMask src));
23060 effect(TEMP dst);
23061 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23062 ins_encode %{
23063 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23064 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23065 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23066 %}
23067 ins_pipe( pipe_slow );
23068 %}
23069
23070 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23071 predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23072 match(Set dst (VectorLoadMask src));
23073 effect(TEMP xtmp);
23074 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23075 ins_encode %{
23076 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23077 true, Assembler::AVX_512bit);
23078 %}
23079 ins_pipe( pipe_slow );
23080 %}
23081
23082 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
23083 predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23084 match(Set dst (VectorLoadMask src));
23085 effect(TEMP xtmp);
23086 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23087 ins_encode %{
23088 int vlen_enc = vector_length_encoding(in(1));
23089 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23090 false, vlen_enc);
23091 %}
23092 ins_pipe( pipe_slow );
23093 %}
23094
23095 //------------------------------------- StoreMask --------------------------------------------
23096
23097 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23098 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23099 match(Set dst (VectorStoreMask src size));
23100 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23101 ins_encode %{
23102 int vlen = Matcher::vector_length(this);
23103 if (vlen <= 16 && UseAVX <= 2) {
23104 assert(UseSSE >= 3, "required");
23105 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23106 } else {
23107 assert(UseAVX > 0, "required");
23108 int src_vlen_enc = vector_length_encoding(this, $src);
23109 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23110 }
23111 %}
23112 ins_pipe( pipe_slow );
23113 %}
23114
23115 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23116 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23117 match(Set dst (VectorStoreMask src size));
23118 effect(TEMP_DEF dst, TEMP xtmp);
23119 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23120 ins_encode %{
23121 int vlen_enc = Assembler::AVX_128bit;
23122 int vlen = Matcher::vector_length(this);
23123 if (vlen <= 8) {
23124 assert(UseSSE >= 3, "required");
23125 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23126 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23127 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23128 } else {
23129 assert(UseAVX > 0, "required");
23130 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23131 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23132 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23133 }
23134 %}
23135 ins_pipe( pipe_slow );
23136 %}
23137
23138 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23139 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23140 match(Set dst (VectorStoreMask src size));
23141 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23142 effect(TEMP_DEF dst, TEMP xtmp);
23143 ins_encode %{
23144 int vlen_enc = Assembler::AVX_128bit;
23145 int vlen = Matcher::vector_length(this);
23146 if (vlen <= 4) {
23147 assert(UseSSE >= 3, "required");
23148 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23149 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23150 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23151 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23152 } else {
23153 assert(UseAVX > 0, "required");
23154 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23155 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23156 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23157 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23158 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23159 }
23160 %}
23161 ins_pipe( pipe_slow );
23162 %}
23163
23164 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23165 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23166 match(Set dst (VectorStoreMask src size));
23167 effect(TEMP_DEF dst, TEMP xtmp);
23168 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23169 ins_encode %{
23170 assert(UseSSE >= 3, "required");
23171 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23172 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23173 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23174 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23175 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23176 %}
23177 ins_pipe( pipe_slow );
23178 %}
23179
23180 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23181 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23182 match(Set dst (VectorStoreMask src size));
23183 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23184 effect(TEMP_DEF dst, TEMP vtmp);
23185 ins_encode %{
23186 int vlen_enc = Assembler::AVX_128bit;
23187 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23188 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23189 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23190 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23191 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23192 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23193 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23194 %}
23195 ins_pipe( pipe_slow );
23196 %}
23197
23198 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23199 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23200 match(Set dst (VectorStoreMask src size));
23201 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23202 ins_encode %{
23203 int src_vlen_enc = vector_length_encoding(this, $src);
23204 int dst_vlen_enc = vector_length_encoding(this);
23205 if (!VM_Version::supports_avx512vl()) {
23206 src_vlen_enc = Assembler::AVX_512bit;
23207 }
23208 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23209 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23210 %}
23211 ins_pipe( pipe_slow );
23212 %}
23213
23214 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23215 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23216 match(Set dst (VectorStoreMask src size));
23217 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23218 ins_encode %{
23219 int src_vlen_enc = vector_length_encoding(this, $src);
23220 int dst_vlen_enc = vector_length_encoding(this);
23221 if (!VM_Version::supports_avx512vl()) {
23222 src_vlen_enc = Assembler::AVX_512bit;
23223 }
23224 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23225 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23226 %}
23227 ins_pipe( pipe_slow );
23228 %}
23229
23230 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23231 predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23232 match(Set dst (VectorStoreMask mask size));
23233 effect(TEMP_DEF dst);
23234 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23235 ins_encode %{
23236 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23237 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23238 false, Assembler::AVX_512bit, noreg);
23239 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23240 %}
23241 ins_pipe( pipe_slow );
23242 %}
23243
23244 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23245 predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23246 match(Set dst (VectorStoreMask mask size));
23247 effect(TEMP_DEF dst);
23248 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23249 ins_encode %{
23250 int dst_vlen_enc = vector_length_encoding(this);
23251 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23252 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23253 %}
23254 ins_pipe( pipe_slow );
23255 %}
23256
23257 instruct vmaskcast_evex(kReg dst) %{
23258 match(Set dst (VectorMaskCast dst));
23259 ins_cost(0);
23260 format %{ "vector_mask_cast $dst" %}
23261 ins_encode %{
23262 // empty
23263 %}
23264 ins_pipe(empty);
23265 %}
23266
23267 instruct vmaskcast(vec dst) %{
23268 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23269 match(Set dst (VectorMaskCast dst));
23270 ins_cost(0);
23271 format %{ "vector_mask_cast $dst" %}
23272 ins_encode %{
23273 // empty
23274 %}
23275 ins_pipe(empty);
23276 %}
23277
23278 instruct vmaskcast_avx(vec dst, vec src) %{
23279 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23280 match(Set dst (VectorMaskCast src));
23281 format %{ "vector_mask_cast $dst, $src" %}
23282 ins_encode %{
23283 int vlen = Matcher::vector_length(this);
23284 BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23285 BasicType dst_bt = Matcher::vector_element_basic_type(this);
23286 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23287 %}
23288 ins_pipe(pipe_slow);
23289 %}
23290
23291 //-------------------------------- Load Iota Indices ----------------------------------
23292
23293 instruct loadIotaIndices(vec dst, immI_0 src) %{
23294 match(Set dst (VectorLoadConst src));
23295 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23296 ins_encode %{
23297 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23298 BasicType bt = Matcher::vector_element_basic_type(this);
23299 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23300 %}
23301 ins_pipe( pipe_slow );
23302 %}
23303
23304 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23305 match(Set dst (PopulateIndex src1 src2));
23306 effect(TEMP dst, TEMP vtmp);
23307 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23308 ins_encode %{
23309 assert($src2$$constant == 1, "required");
23310 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23311 int vlen_enc = vector_length_encoding(this);
23312 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23313 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23314 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23315 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23316 %}
23317 ins_pipe( pipe_slow );
23318 %}
23319
23320 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23321 match(Set dst (PopulateIndex src1 src2));
23322 effect(TEMP dst, TEMP vtmp);
23323 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23324 ins_encode %{
23325 assert($src2$$constant == 1, "required");
23326 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23327 int vlen_enc = vector_length_encoding(this);
23328 BasicType elem_bt = Matcher::vector_element_basic_type(this);
23329 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23330 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23331 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23332 %}
23333 ins_pipe( pipe_slow );
23334 %}
23335
23336 //-------------------------------- Rearrange ----------------------------------
23337
23338 // LoadShuffle/Rearrange for Byte
23339 instruct rearrangeB(vec dst, vec shuffle) %{
23340 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23341 Matcher::vector_length(n) < 32);
23342 match(Set dst (VectorRearrange dst shuffle));
23343 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23344 ins_encode %{
23345 assert(UseSSE >= 4, "required");
23346 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23347 %}
23348 ins_pipe( pipe_slow );
23349 %}
23350
23351 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23352 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23353 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23354 match(Set dst (VectorRearrange src shuffle));
23355 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23356 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23357 ins_encode %{
23358 assert(UseAVX >= 2, "required");
23359 // Swap src into vtmp1
23360 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23361 // Shuffle swapped src to get entries from other 128 bit lane
23362 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23363 // Shuffle original src to get entries from self 128 bit lane
23364 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23365 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23366 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23367 // Perform the blend
23368 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23369 %}
23370 ins_pipe( pipe_slow );
23371 %}
23372
23373
23374 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23375 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23376 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23377 match(Set dst (VectorRearrange src shuffle));
23378 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23379 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23380 ins_encode %{
23381 int vlen_enc = vector_length_encoding(this);
23382 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23383 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23384 $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23385 %}
23386 ins_pipe( pipe_slow );
23387 %}
23388
23389 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23390 predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23391 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23392 match(Set dst (VectorRearrange src shuffle));
23393 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23394 ins_encode %{
23395 int vlen_enc = vector_length_encoding(this);
23396 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23397 %}
23398 ins_pipe( pipe_slow );
23399 %}
23400
23401 // LoadShuffle/Rearrange for Short
23402
23403 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23404 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23405 !VM_Version::supports_avx512bw());
23406 match(Set dst (VectorLoadShuffle src));
23407 effect(TEMP dst, TEMP vtmp);
23408 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23409 ins_encode %{
23410 // Create a byte shuffle mask from short shuffle mask
23411 // only byte shuffle instruction available on these platforms
23412 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23413 if (UseAVX == 0) {
23414 assert(vlen_in_bytes <= 16, "required");
23415 // Multiply each shuffle by two to get byte index
23416 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23417 __ psllw($vtmp$$XMMRegister, 1);
23418
23419 // Duplicate to create 2 copies of byte index
23420 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23421 __ psllw($dst$$XMMRegister, 8);
23422 __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23423
23424 // Add one to get alternate byte index
23425 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23426 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23427 } else {
23428 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23429 int vlen_enc = vector_length_encoding(this);
23430 // Multiply each shuffle by two to get byte index
23431 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23432
23433 // Duplicate to create 2 copies of byte index
23434 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc);
23435 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23436
23437 // Add one to get alternate byte index
23438 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23439 }
23440 %}
23441 ins_pipe( pipe_slow );
23442 %}
23443
23444 instruct rearrangeS(vec dst, vec shuffle) %{
23445 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23446 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23447 match(Set dst (VectorRearrange dst shuffle));
23448 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23449 ins_encode %{
23450 assert(UseSSE >= 4, "required");
23451 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23452 %}
23453 ins_pipe( pipe_slow );
23454 %}
23455
23456 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23457 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23458 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23459 match(Set dst (VectorRearrange src shuffle));
23460 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23461 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23462 ins_encode %{
23463 assert(UseAVX >= 2, "required");
23464 // Swap src into vtmp1
23465 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23466 // Shuffle swapped src to get entries from other 128 bit lane
23467 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23468 // Shuffle original src to get entries from self 128 bit lane
23469 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23470 // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23471 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23472 // Perform the blend
23473 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23474 %}
23475 ins_pipe( pipe_slow );
23476 %}
23477
23478 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23479 predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23480 VM_Version::supports_avx512bw());
23481 match(Set dst (VectorRearrange src shuffle));
23482 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23483 ins_encode %{
23484 int vlen_enc = vector_length_encoding(this);
23485 if (!VM_Version::supports_avx512vl()) {
23486 vlen_enc = Assembler::AVX_512bit;
23487 }
23488 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23489 %}
23490 ins_pipe( pipe_slow );
23491 %}
23492
23493 // LoadShuffle/Rearrange for Integer and Float
23494
23495 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23496 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23497 Matcher::vector_length(n) == 4 && UseAVX == 0);
23498 match(Set dst (VectorLoadShuffle src));
23499 effect(TEMP dst, TEMP vtmp);
23500 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23501 ins_encode %{
23502 assert(UseSSE >= 4, "required");
23503
23504 // Create a byte shuffle mask from int shuffle mask
23505 // only byte shuffle instruction available on these platforms
23506
23507 // Duplicate and multiply each shuffle by 4
23508 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23509 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23510 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23511 __ psllw($vtmp$$XMMRegister, 2);
23512
23513 // Duplicate again to create 4 copies of byte index
23514 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23515 __ psllw($dst$$XMMRegister, 8);
23516 __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23517
23518 // Add 3,2,1,0 to get alternate byte index
23519 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23520 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23521 %}
23522 ins_pipe( pipe_slow );
23523 %}
23524
23525 instruct rearrangeI(vec dst, vec shuffle) %{
23526 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23527 UseAVX == 0);
23528 match(Set dst (VectorRearrange dst shuffle));
23529 format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23530 ins_encode %{
23531 assert(UseSSE >= 4, "required");
23532 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23533 %}
23534 ins_pipe( pipe_slow );
23535 %}
23536
23537 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23538 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23539 UseAVX > 0);
23540 match(Set dst (VectorRearrange src shuffle));
23541 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23542 ins_encode %{
23543 int vlen_enc = vector_length_encoding(this);
23544 BasicType bt = Matcher::vector_element_basic_type(this);
23545 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23546 %}
23547 ins_pipe( pipe_slow );
23548 %}
23549
23550 // LoadShuffle/Rearrange for Long and Double
23551
23552 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23553 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23554 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23555 match(Set dst (VectorLoadShuffle src));
23556 effect(TEMP dst, TEMP vtmp);
23557 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23558 ins_encode %{
23559 assert(UseAVX >= 2, "required");
23560
23561 int vlen_enc = vector_length_encoding(this);
23562 // Create a double word shuffle mask from long shuffle mask
23563 // only double word shuffle instruction available on these platforms
23564
23565 // Multiply each shuffle by two to get double word index
23566 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23567
23568 // Duplicate each double word shuffle
23569 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23570 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23571
23572 // Add one to get alternate double word index
23573 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23574 %}
23575 ins_pipe( pipe_slow );
23576 %}
23577
23578 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23579 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23580 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23581 match(Set dst (VectorRearrange src shuffle));
23582 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23583 ins_encode %{
23584 assert(UseAVX >= 2, "required");
23585
23586 int vlen_enc = vector_length_encoding(this);
23587 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23588 %}
23589 ins_pipe( pipe_slow );
23590 %}
23591
23592 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23593 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23594 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23595 match(Set dst (VectorRearrange src shuffle));
23596 format %{ "vector_rearrange $dst, $shuffle, $src" %}
23597 ins_encode %{
23598 assert(UseAVX > 2, "required");
23599
23600 int vlen_enc = vector_length_encoding(this);
23601 if (vlen_enc == Assembler::AVX_128bit) {
23602 vlen_enc = Assembler::AVX_256bit;
23603 }
23604 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23605 %}
23606 ins_pipe( pipe_slow );
23607 %}
23608
23609 // --------------------------------- FMA --------------------------------------
23610 // a * b + c
23611
23612 instruct vfmaF_reg(vec a, vec b, vec c) %{
23613 match(Set c (FmaVF c (Binary a b)));
23614 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23615 ins_cost(150);
23616 ins_encode %{
23617 assert(UseFMA, "not enabled");
23618 int vlen_enc = vector_length_encoding(this);
23619 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23620 %}
23621 ins_pipe( pipe_slow );
23622 %}
23623
23624 instruct vfmaF_mem(vec a, memory b, vec c) %{
23625 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23626 match(Set c (FmaVF c (Binary a (LoadVector b))));
23627 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23628 ins_cost(150);
23629 ins_encode %{
23630 assert(UseFMA, "not enabled");
23631 int vlen_enc = vector_length_encoding(this);
23632 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23633 %}
23634 ins_pipe( pipe_slow );
23635 %}
23636
23637 instruct vfmaD_reg(vec a, vec b, vec c) %{
23638 match(Set c (FmaVD c (Binary a b)));
23639 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23640 ins_cost(150);
23641 ins_encode %{
23642 assert(UseFMA, "not enabled");
23643 int vlen_enc = vector_length_encoding(this);
23644 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23645 %}
23646 ins_pipe( pipe_slow );
23647 %}
23648
23649 instruct vfmaD_mem(vec a, memory b, vec c) %{
23650 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23651 match(Set c (FmaVD c (Binary a (LoadVector b))));
23652 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23653 ins_cost(150);
23654 ins_encode %{
23655 assert(UseFMA, "not enabled");
23656 int vlen_enc = vector_length_encoding(this);
23657 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23658 %}
23659 ins_pipe( pipe_slow );
23660 %}
23661
23662 // --------------------------------- Vector Multiply Add --------------------------------------
23663
23664 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23665 predicate(UseAVX == 0);
23666 match(Set dst (MulAddVS2VI dst src1));
23667 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23668 ins_encode %{
23669 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23670 %}
23671 ins_pipe( pipe_slow );
23672 %}
23673
23674 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23675 predicate(UseAVX > 0);
23676 match(Set dst (MulAddVS2VI src1 src2));
23677 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23678 ins_encode %{
23679 int vlen_enc = vector_length_encoding(this);
23680 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23681 %}
23682 ins_pipe( pipe_slow );
23683 %}
23684
23685 // --------------------------------- Vector Multiply Add Add ----------------------------------
23686
23687 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23688 predicate(VM_Version::supports_avx512_vnni());
23689 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23690 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23691 ins_encode %{
23692 assert(UseAVX > 2, "required");
23693 int vlen_enc = vector_length_encoding(this);
23694 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23695 %}
23696 ins_pipe( pipe_slow );
23697 ins_cost(10);
23698 %}
23699
23700 // --------------------------------- PopCount --------------------------------------
23701
23702 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23703 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23704 match(Set dst (PopCountVI src));
23705 match(Set dst (PopCountVL src));
23706 format %{ "vector_popcount_integral $dst, $src" %}
23707 ins_encode %{
23708 int opcode = this->ideal_Opcode();
23709 int vlen_enc = vector_length_encoding(this, $src);
23710 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23711 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23712 %}
23713 ins_pipe( pipe_slow );
23714 %}
23715
23716 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23717 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23718 match(Set dst (PopCountVI src mask));
23719 match(Set dst (PopCountVL src mask));
23720 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23721 ins_encode %{
23722 int vlen_enc = vector_length_encoding(this, $src);
23723 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23724 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23725 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23726 %}
23727 ins_pipe( pipe_slow );
23728 %}
23729
23730 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23731 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23732 match(Set dst (PopCountVI src));
23733 match(Set dst (PopCountVL src));
23734 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23735 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23736 ins_encode %{
23737 int opcode = this->ideal_Opcode();
23738 int vlen_enc = vector_length_encoding(this, $src);
23739 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23740 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23741 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23742 %}
23743 ins_pipe( pipe_slow );
23744 %}
23745
23746 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23747
23748 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23749 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23750 Matcher::vector_length_in_bytes(n->in(1))));
23751 match(Set dst (CountTrailingZerosV src));
23752 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23753 ins_cost(400);
23754 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23755 ins_encode %{
23756 int vlen_enc = vector_length_encoding(this, $src);
23757 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23758 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23759 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23760 %}
23761 ins_pipe( pipe_slow );
23762 %}
23763
23764 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23765 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23766 VM_Version::supports_avx512cd() &&
23767 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23768 match(Set dst (CountTrailingZerosV src));
23769 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23770 ins_cost(400);
23771 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23772 ins_encode %{
23773 int vlen_enc = vector_length_encoding(this, $src);
23774 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23775 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23776 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23777 %}
23778 ins_pipe( pipe_slow );
23779 %}
23780
23781 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23782 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23783 match(Set dst (CountTrailingZerosV src));
23784 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23785 ins_cost(400);
23786 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23787 ins_encode %{
23788 int vlen_enc = vector_length_encoding(this, $src);
23789 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23790 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23791 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23792 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23793 %}
23794 ins_pipe( pipe_slow );
23795 %}
23796
23797 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23798 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23799 match(Set dst (CountTrailingZerosV src));
23800 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23801 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23802 ins_encode %{
23803 int vlen_enc = vector_length_encoding(this, $src);
23804 BasicType bt = Matcher::vector_element_basic_type(this, $src);
23805 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23806 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23807 %}
23808 ins_pipe( pipe_slow );
23809 %}
23810
23811
23812 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23813
23814 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23815 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23816 effect(TEMP dst);
23817 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23818 ins_encode %{
23819 int vector_len = vector_length_encoding(this);
23820 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23821 %}
23822 ins_pipe( pipe_slow );
23823 %}
23824
23825 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23826 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23827 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23828 effect(TEMP dst);
23829 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23830 ins_encode %{
23831 int vector_len = vector_length_encoding(this);
23832 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23833 %}
23834 ins_pipe( pipe_slow );
23835 %}
23836
23837 // --------------------------------- Rotation Operations ----------------------------------
23838 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23839 match(Set dst (RotateLeftV src shift));
23840 match(Set dst (RotateRightV src shift));
23841 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23842 ins_encode %{
23843 int opcode = this->ideal_Opcode();
23844 int vector_len = vector_length_encoding(this);
23845 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23846 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23847 %}
23848 ins_pipe( pipe_slow );
23849 %}
23850
23851 instruct vprorate(vec dst, vec src, vec shift) %{
23852 match(Set dst (RotateLeftV src shift));
23853 match(Set dst (RotateRightV src shift));
23854 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23855 ins_encode %{
23856 int opcode = this->ideal_Opcode();
23857 int vector_len = vector_length_encoding(this);
23858 BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23859 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23860 %}
23861 ins_pipe( pipe_slow );
23862 %}
23863
23864 // ---------------------------------- Masked Operations ------------------------------------
23865 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23866 predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23867 match(Set dst (LoadVectorMasked mem mask));
23868 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23869 ins_encode %{
23870 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23871 int vlen_enc = vector_length_encoding(this);
23872 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23873 %}
23874 ins_pipe( pipe_slow );
23875 %}
23876
23877
23878 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23879 predicate(n->in(3)->bottom_type()->isa_pvectmask());
23880 match(Set dst (LoadVectorMasked mem mask));
23881 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23882 ins_encode %{
23883 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23884 int vector_len = vector_length_encoding(this);
23885 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23886 %}
23887 ins_pipe( pipe_slow );
23888 %}
23889
23890 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23891 predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23892 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23893 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23894 ins_encode %{
23895 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23896 int vlen_enc = vector_length_encoding(src_node);
23897 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23898 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23899 %}
23900 ins_pipe( pipe_slow );
23901 %}
23902
23903 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23904 predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23905 match(Set mem (StoreVectorMasked mem (Binary src mask)));
23906 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23907 ins_encode %{
23908 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23909 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
23910 int vlen_enc = vector_length_encoding(src_node);
23911 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23912 %}
23913 ins_pipe( pipe_slow );
23914 %}
23915
23916 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23917 match(Set addr (VerifyVectorAlignment addr mask));
23918 effect(KILL cr);
23919 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23920 ins_encode %{
23921 Label Lskip;
23922 // check if masked bits of addr are zero
23923 __ testq($addr$$Register, $mask$$constant);
23924 __ jccb(Assembler::equal, Lskip);
23925 __ stop("verify_vector_alignment found a misaligned vector memory access");
23926 __ bind(Lskip);
23927 %}
23928 ins_pipe(pipe_slow);
23929 %}
23930
23931 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23932 match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23933 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23934 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23935 ins_encode %{
23936 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23937 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23938
23939 Label DONE;
23940 int vlen_enc = vector_length_encoding(this, $src1);
23941 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23942
23943 __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23944 __ mov64($dst$$Register, -1L);
23945 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23946 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23947 __ jccb(Assembler::carrySet, DONE);
23948 __ kmovql($dst$$Register, $ktmp1$$KRegister);
23949 __ notq($dst$$Register);
23950 __ tzcntq($dst$$Register, $dst$$Register);
23951 __ bind(DONE);
23952 %}
23953 ins_pipe( pipe_slow );
23954 %}
23955
23956
23957 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23958 match(Set dst (VectorMaskGen len));
23959 effect(TEMP temp, KILL cr);
23960 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23961 ins_encode %{
23962 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23963 %}
23964 ins_pipe( pipe_slow );
23965 %}
23966
23967 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23968 match(Set dst (VectorMaskGen len));
23969 format %{ "vector_mask_gen $len \t! vector mask generator" %}
23970 effect(TEMP temp);
23971 ins_encode %{
23972 if ($len$$constant > 0) {
23973 __ mov64($temp$$Register, right_n_bits($len$$constant));
23974 __ kmovql($dst$$KRegister, $temp$$Register);
23975 } else {
23976 __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23977 }
23978 %}
23979 ins_pipe( pipe_slow );
23980 %}
23981
23982 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23983 predicate(n->in(1)->bottom_type()->isa_pvectmask());
23984 match(Set dst (VectorMaskToLong mask));
23985 effect(TEMP dst, KILL cr);
23986 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23987 ins_encode %{
23988 int opcode = this->ideal_Opcode();
23989 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23990 int mask_len = Matcher::vector_length(this, $mask);
23991 int mask_size = mask_len * type2aelembytes(mbt);
23992 int vlen_enc = vector_length_encoding(this, $mask);
23993 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23994 $dst$$Register, mask_len, mask_size, vlen_enc);
23995 %}
23996 ins_pipe( pipe_slow );
23997 %}
23998
23999 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24000 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24001 match(Set dst (VectorMaskToLong mask));
24002 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24003 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24004 ins_encode %{
24005 int opcode = this->ideal_Opcode();
24006 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24007 int mask_len = Matcher::vector_length(this, $mask);
24008 int vlen_enc = vector_length_encoding(this, $mask);
24009 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24010 $dst$$Register, mask_len, mbt, vlen_enc);
24011 %}
24012 ins_pipe( pipe_slow );
24013 %}
24014
24015 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24016 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24017 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24018 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24019 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24020 ins_encode %{
24021 int opcode = this->ideal_Opcode();
24022 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24023 int mask_len = Matcher::vector_length(this, $mask);
24024 int vlen_enc = vector_length_encoding(this, $mask);
24025 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24026 $dst$$Register, mask_len, mbt, vlen_enc);
24027 %}
24028 ins_pipe( pipe_slow );
24029 %}
24030
24031 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24032 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24033 match(Set dst (VectorMaskTrueCount mask));
24034 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24035 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24036 ins_encode %{
24037 int opcode = this->ideal_Opcode();
24038 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24039 int mask_len = Matcher::vector_length(this, $mask);
24040 int mask_size = mask_len * type2aelembytes(mbt);
24041 int vlen_enc = vector_length_encoding(this, $mask);
24042 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24043 $tmp$$Register, mask_len, mask_size, vlen_enc);
24044 %}
24045 ins_pipe( pipe_slow );
24046 %}
24047
24048 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24049 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24050 match(Set dst (VectorMaskTrueCount mask));
24051 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24052 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24053 ins_encode %{
24054 int opcode = this->ideal_Opcode();
24055 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24056 int mask_len = Matcher::vector_length(this, $mask);
24057 int vlen_enc = vector_length_encoding(this, $mask);
24058 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24059 $tmp$$Register, mask_len, mbt, vlen_enc);
24060 %}
24061 ins_pipe( pipe_slow );
24062 %}
24063
24064 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24065 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24066 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24067 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24068 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24069 ins_encode %{
24070 int opcode = this->ideal_Opcode();
24071 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24072 int mask_len = Matcher::vector_length(this, $mask);
24073 int vlen_enc = vector_length_encoding(this, $mask);
24074 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24075 $tmp$$Register, mask_len, mbt, vlen_enc);
24076 %}
24077 ins_pipe( pipe_slow );
24078 %}
24079
24080 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24081 predicate(n->in(1)->bottom_type()->isa_pvectmask());
24082 match(Set dst (VectorMaskFirstTrue mask));
24083 match(Set dst (VectorMaskLastTrue mask));
24084 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24085 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24086 ins_encode %{
24087 int opcode = this->ideal_Opcode();
24088 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24089 int mask_len = Matcher::vector_length(this, $mask);
24090 int mask_size = mask_len * type2aelembytes(mbt);
24091 int vlen_enc = vector_length_encoding(this, $mask);
24092 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24093 $tmp$$Register, mask_len, mask_size, vlen_enc);
24094 %}
24095 ins_pipe( pipe_slow );
24096 %}
24097
24098 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24099 predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24100 match(Set dst (VectorMaskFirstTrue mask));
24101 match(Set dst (VectorMaskLastTrue mask));
24102 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24103 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24104 ins_encode %{
24105 int opcode = this->ideal_Opcode();
24106 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24107 int mask_len = Matcher::vector_length(this, $mask);
24108 int vlen_enc = vector_length_encoding(this, $mask);
24109 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24110 $tmp$$Register, mask_len, mbt, vlen_enc);
24111 %}
24112 ins_pipe( pipe_slow );
24113 %}
24114
24115 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24116 predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24117 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24118 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24119 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24120 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24121 ins_encode %{
24122 int opcode = this->ideal_Opcode();
24123 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24124 int mask_len = Matcher::vector_length(this, $mask);
24125 int vlen_enc = vector_length_encoding(this, $mask);
24126 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24127 $tmp$$Register, mask_len, mbt, vlen_enc);
24128 %}
24129 ins_pipe( pipe_slow );
24130 %}
24131
24132 // --------------------------------- Compress/Expand Operations ---------------------------
24133 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24134 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24135 match(Set dst (CompressV src mask));
24136 match(Set dst (ExpandV src mask));
24137 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24138 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24139 ins_encode %{
24140 int opcode = this->ideal_Opcode();
24141 int vlen_enc = vector_length_encoding(this);
24142 BasicType bt = Matcher::vector_element_basic_type(this);
24143 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24144 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24145 %}
24146 ins_pipe( pipe_slow );
24147 %}
24148
24149 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24150 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24151 match(Set dst (CompressV src mask));
24152 match(Set dst (ExpandV src mask));
24153 format %{ "vector_compress_expand $dst, $src, $mask" %}
24154 ins_encode %{
24155 int opcode = this->ideal_Opcode();
24156 int vector_len = vector_length_encoding(this);
24157 BasicType bt = Matcher::vector_element_basic_type(this);
24158 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24159 %}
24160 ins_pipe( pipe_slow );
24161 %}
24162
24163 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24164 match(Set dst (CompressM mask));
24165 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24166 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24167 ins_encode %{
24168 assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24169 int mask_len = Matcher::vector_length(this);
24170 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24171 %}
24172 ins_pipe( pipe_slow );
24173 %}
24174
24175 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24176
24177 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24178 predicate(!VM_Version::supports_gfni());
24179 match(Set dst (ReverseV src));
24180 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24181 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24182 ins_encode %{
24183 int vec_enc = vector_length_encoding(this);
24184 BasicType bt = Matcher::vector_element_basic_type(this);
24185 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24186 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24187 %}
24188 ins_pipe( pipe_slow );
24189 %}
24190
24191 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24192 predicate(VM_Version::supports_gfni());
24193 match(Set dst (ReverseV src));
24194 effect(TEMP dst, TEMP xtmp);
24195 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24196 ins_encode %{
24197 int vec_enc = vector_length_encoding(this);
24198 BasicType bt = Matcher::vector_element_basic_type(this);
24199 InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24200 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24201 $xtmp$$XMMRegister);
24202 %}
24203 ins_pipe( pipe_slow );
24204 %}
24205
24206 instruct vreverse_byte_reg(vec dst, vec src) %{
24207 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24208 match(Set dst (ReverseBytesV src));
24209 effect(TEMP dst);
24210 format %{ "vector_reverse_byte $dst, $src" %}
24211 ins_encode %{
24212 int vec_enc = vector_length_encoding(this);
24213 BasicType bt = Matcher::vector_element_basic_type(this);
24214 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24215 %}
24216 ins_pipe( pipe_slow );
24217 %}
24218
24219 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24220 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24221 match(Set dst (ReverseBytesV src));
24222 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24223 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24224 ins_encode %{
24225 int vec_enc = vector_length_encoding(this);
24226 BasicType bt = Matcher::vector_element_basic_type(this);
24227 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24228 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24229 %}
24230 ins_pipe( pipe_slow );
24231 %}
24232
24233 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24234
24235 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24236 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24237 Matcher::vector_length_in_bytes(n->in(1))));
24238 match(Set dst (CountLeadingZerosV src));
24239 format %{ "vector_count_leading_zeros $dst, $src" %}
24240 ins_encode %{
24241 int vlen_enc = vector_length_encoding(this, $src);
24242 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24243 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24244 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24245 %}
24246 ins_pipe( pipe_slow );
24247 %}
24248
24249 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24250 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24251 Matcher::vector_length_in_bytes(n->in(1))));
24252 match(Set dst (CountLeadingZerosV src mask));
24253 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24254 ins_encode %{
24255 int vlen_enc = vector_length_encoding(this, $src);
24256 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24257 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24258 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24259 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24260 %}
24261 ins_pipe( pipe_slow );
24262 %}
24263
24264 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24265 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24266 VM_Version::supports_avx512cd() &&
24267 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24268 match(Set dst (CountLeadingZerosV src));
24269 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24270 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24271 ins_encode %{
24272 int vlen_enc = vector_length_encoding(this, $src);
24273 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24274 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24275 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24276 %}
24277 ins_pipe( pipe_slow );
24278 %}
24279
24280 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24281 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24282 match(Set dst (CountLeadingZerosV src));
24283 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24284 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24285 ins_encode %{
24286 int vlen_enc = vector_length_encoding(this, $src);
24287 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24288 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24289 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24290 $rtmp$$Register, true, vlen_enc);
24291 %}
24292 ins_pipe( pipe_slow );
24293 %}
24294
24295 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24296 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24297 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24298 match(Set dst (CountLeadingZerosV src));
24299 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24300 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24301 ins_encode %{
24302 int vlen_enc = vector_length_encoding(this, $src);
24303 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24304 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24305 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24306 %}
24307 ins_pipe( pipe_slow );
24308 %}
24309
24310 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24311 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24312 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24313 match(Set dst (CountLeadingZerosV src));
24314 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24315 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24316 ins_encode %{
24317 int vlen_enc = vector_length_encoding(this, $src);
24318 BasicType bt = Matcher::vector_element_basic_type(this, $src);
24319 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24320 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24321 %}
24322 ins_pipe( pipe_slow );
24323 %}
24324
24325 // ---------------------------------- Vector Masked Operations ------------------------------------
24326
24327 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24328 match(Set dst (AddVB (Binary dst src2) mask));
24329 match(Set dst (AddVS (Binary dst src2) mask));
24330 match(Set dst (AddVI (Binary dst src2) mask));
24331 match(Set dst (AddVL (Binary dst src2) mask));
24332 match(Set dst (AddVF (Binary dst src2) mask));
24333 match(Set dst (AddVD (Binary dst src2) mask));
24334 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24335 ins_encode %{
24336 int vlen_enc = vector_length_encoding(this);
24337 BasicType bt = Matcher::vector_element_basic_type(this);
24338 int opc = this->ideal_Opcode();
24339 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24340 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24341 %}
24342 ins_pipe( pipe_slow );
24343 %}
24344
24345 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24346 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24347 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24348 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24349 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24350 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24351 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24352 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24353 ins_encode %{
24354 int vlen_enc = vector_length_encoding(this);
24355 BasicType bt = Matcher::vector_element_basic_type(this);
24356 int opc = this->ideal_Opcode();
24357 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24358 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24359 %}
24360 ins_pipe( pipe_slow );
24361 %}
24362
24363 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24364 match(Set dst (XorV (Binary dst src2) mask));
24365 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24366 ins_encode %{
24367 int vlen_enc = vector_length_encoding(this);
24368 BasicType bt = Matcher::vector_element_basic_type(this);
24369 int opc = this->ideal_Opcode();
24370 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24371 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24372 %}
24373 ins_pipe( pipe_slow );
24374 %}
24375
24376 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24377 match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24378 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24379 ins_encode %{
24380 int vlen_enc = vector_length_encoding(this);
24381 BasicType bt = Matcher::vector_element_basic_type(this);
24382 int opc = this->ideal_Opcode();
24383 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24384 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24385 %}
24386 ins_pipe( pipe_slow );
24387 %}
24388
24389 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24390 match(Set dst (OrV (Binary dst src2) mask));
24391 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24392 ins_encode %{
24393 int vlen_enc = vector_length_encoding(this);
24394 BasicType bt = Matcher::vector_element_basic_type(this);
24395 int opc = this->ideal_Opcode();
24396 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24397 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24398 %}
24399 ins_pipe( pipe_slow );
24400 %}
24401
24402 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24403 match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24404 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24405 ins_encode %{
24406 int vlen_enc = vector_length_encoding(this);
24407 BasicType bt = Matcher::vector_element_basic_type(this);
24408 int opc = this->ideal_Opcode();
24409 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24410 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24411 %}
24412 ins_pipe( pipe_slow );
24413 %}
24414
24415 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24416 match(Set dst (AndV (Binary dst src2) mask));
24417 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24418 ins_encode %{
24419 int vlen_enc = vector_length_encoding(this);
24420 BasicType bt = Matcher::vector_element_basic_type(this);
24421 int opc = this->ideal_Opcode();
24422 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24423 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24424 %}
24425 ins_pipe( pipe_slow );
24426 %}
24427
24428 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24429 match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24430 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24431 ins_encode %{
24432 int vlen_enc = vector_length_encoding(this);
24433 BasicType bt = Matcher::vector_element_basic_type(this);
24434 int opc = this->ideal_Opcode();
24435 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24436 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24437 %}
24438 ins_pipe( pipe_slow );
24439 %}
24440
24441 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24442 match(Set dst (SubVB (Binary dst src2) mask));
24443 match(Set dst (SubVS (Binary dst src2) mask));
24444 match(Set dst (SubVI (Binary dst src2) mask));
24445 match(Set dst (SubVL (Binary dst src2) mask));
24446 match(Set dst (SubVF (Binary dst src2) mask));
24447 match(Set dst (SubVD (Binary dst src2) mask));
24448 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24449 ins_encode %{
24450 int vlen_enc = vector_length_encoding(this);
24451 BasicType bt = Matcher::vector_element_basic_type(this);
24452 int opc = this->ideal_Opcode();
24453 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24454 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24455 %}
24456 ins_pipe( pipe_slow );
24457 %}
24458
24459 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24460 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24461 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24462 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24463 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24464 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24465 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24466 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24467 ins_encode %{
24468 int vlen_enc = vector_length_encoding(this);
24469 BasicType bt = Matcher::vector_element_basic_type(this);
24470 int opc = this->ideal_Opcode();
24471 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24472 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24473 %}
24474 ins_pipe( pipe_slow );
24475 %}
24476
24477 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24478 match(Set dst (MulVS (Binary dst src2) mask));
24479 match(Set dst (MulVI (Binary dst src2) mask));
24480 match(Set dst (MulVL (Binary dst src2) mask));
24481 match(Set dst (MulVF (Binary dst src2) mask));
24482 match(Set dst (MulVD (Binary dst src2) mask));
24483 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24484 ins_encode %{
24485 int vlen_enc = vector_length_encoding(this);
24486 BasicType bt = Matcher::vector_element_basic_type(this);
24487 int opc = this->ideal_Opcode();
24488 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24489 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24490 %}
24491 ins_pipe( pipe_slow );
24492 %}
24493
24494 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24495 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24496 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24497 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24498 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24499 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24500 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24501 ins_encode %{
24502 int vlen_enc = vector_length_encoding(this);
24503 BasicType bt = Matcher::vector_element_basic_type(this);
24504 int opc = this->ideal_Opcode();
24505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24506 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24507 %}
24508 ins_pipe( pipe_slow );
24509 %}
24510
24511 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24512 match(Set dst (SqrtVF dst mask));
24513 match(Set dst (SqrtVD dst mask));
24514 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24515 ins_encode %{
24516 int vlen_enc = vector_length_encoding(this);
24517 BasicType bt = Matcher::vector_element_basic_type(this);
24518 int opc = this->ideal_Opcode();
24519 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24520 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24521 %}
24522 ins_pipe( pipe_slow );
24523 %}
24524
24525 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24526 match(Set dst (DivVF (Binary dst src2) mask));
24527 match(Set dst (DivVD (Binary dst src2) mask));
24528 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24529 ins_encode %{
24530 int vlen_enc = vector_length_encoding(this);
24531 BasicType bt = Matcher::vector_element_basic_type(this);
24532 int opc = this->ideal_Opcode();
24533 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24534 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24535 %}
24536 ins_pipe( pipe_slow );
24537 %}
24538
24539 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24540 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24541 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24542 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24543 ins_encode %{
24544 int vlen_enc = vector_length_encoding(this);
24545 BasicType bt = Matcher::vector_element_basic_type(this);
24546 int opc = this->ideal_Opcode();
24547 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24548 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24549 %}
24550 ins_pipe( pipe_slow );
24551 %}
24552
24553
24554 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24555 match(Set dst (RotateLeftV (Binary dst shift) mask));
24556 match(Set dst (RotateRightV (Binary dst shift) mask));
24557 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24558 ins_encode %{
24559 int vlen_enc = vector_length_encoding(this);
24560 BasicType bt = Matcher::vector_element_basic_type(this);
24561 int opc = this->ideal_Opcode();
24562 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24563 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24564 %}
24565 ins_pipe( pipe_slow );
24566 %}
24567
24568 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24569 match(Set dst (RotateLeftV (Binary dst src2) mask));
24570 match(Set dst (RotateRightV (Binary dst src2) mask));
24571 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24572 ins_encode %{
24573 int vlen_enc = vector_length_encoding(this);
24574 BasicType bt = Matcher::vector_element_basic_type(this);
24575 int opc = this->ideal_Opcode();
24576 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24577 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24578 %}
24579 ins_pipe( pipe_slow );
24580 %}
24581
24582 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24583 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24584 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24585 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24586 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24587 ins_encode %{
24588 int vlen_enc = vector_length_encoding(this);
24589 BasicType bt = Matcher::vector_element_basic_type(this);
24590 int opc = this->ideal_Opcode();
24591 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24592 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24593 %}
24594 ins_pipe( pipe_slow );
24595 %}
24596
24597 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24598 predicate(!n->as_ShiftV()->is_var_shift());
24599 match(Set dst (LShiftVS (Binary dst src2) mask));
24600 match(Set dst (LShiftVI (Binary dst src2) mask));
24601 match(Set dst (LShiftVL (Binary dst src2) mask));
24602 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24603 ins_encode %{
24604 int vlen_enc = vector_length_encoding(this);
24605 BasicType bt = Matcher::vector_element_basic_type(this);
24606 int opc = this->ideal_Opcode();
24607 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24608 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24609 %}
24610 ins_pipe( pipe_slow );
24611 %}
24612
24613 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24614 predicate(n->as_ShiftV()->is_var_shift());
24615 match(Set dst (LShiftVS (Binary dst src2) mask));
24616 match(Set dst (LShiftVI (Binary dst src2) mask));
24617 match(Set dst (LShiftVL (Binary dst src2) mask));
24618 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24619 ins_encode %{
24620 int vlen_enc = vector_length_encoding(this);
24621 BasicType bt = Matcher::vector_element_basic_type(this);
24622 int opc = this->ideal_Opcode();
24623 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24624 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24625 %}
24626 ins_pipe( pipe_slow );
24627 %}
24628
24629 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24630 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24631 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24632 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24633 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24634 ins_encode %{
24635 int vlen_enc = vector_length_encoding(this);
24636 BasicType bt = Matcher::vector_element_basic_type(this);
24637 int opc = this->ideal_Opcode();
24638 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24639 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24640 %}
24641 ins_pipe( pipe_slow );
24642 %}
24643
24644 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24645 predicate(!n->as_ShiftV()->is_var_shift());
24646 match(Set dst (RShiftVS (Binary dst src2) mask));
24647 match(Set dst (RShiftVI (Binary dst src2) mask));
24648 match(Set dst (RShiftVL (Binary dst src2) mask));
24649 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24650 ins_encode %{
24651 int vlen_enc = vector_length_encoding(this);
24652 BasicType bt = Matcher::vector_element_basic_type(this);
24653 int opc = this->ideal_Opcode();
24654 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24655 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24656 %}
24657 ins_pipe( pipe_slow );
24658 %}
24659
24660 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24661 predicate(n->as_ShiftV()->is_var_shift());
24662 match(Set dst (RShiftVS (Binary dst src2) mask));
24663 match(Set dst (RShiftVI (Binary dst src2) mask));
24664 match(Set dst (RShiftVL (Binary dst src2) mask));
24665 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24666 ins_encode %{
24667 int vlen_enc = vector_length_encoding(this);
24668 BasicType bt = Matcher::vector_element_basic_type(this);
24669 int opc = this->ideal_Opcode();
24670 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24671 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24672 %}
24673 ins_pipe( pipe_slow );
24674 %}
24675
24676 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24677 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24678 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24679 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24680 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24681 ins_encode %{
24682 int vlen_enc = vector_length_encoding(this);
24683 BasicType bt = Matcher::vector_element_basic_type(this);
24684 int opc = this->ideal_Opcode();
24685 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24686 $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24687 %}
24688 ins_pipe( pipe_slow );
24689 %}
24690
24691 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24692 predicate(!n->as_ShiftV()->is_var_shift());
24693 match(Set dst (URShiftVS (Binary dst src2) mask));
24694 match(Set dst (URShiftVI (Binary dst src2) mask));
24695 match(Set dst (URShiftVL (Binary dst src2) mask));
24696 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24697 ins_encode %{
24698 int vlen_enc = vector_length_encoding(this);
24699 BasicType bt = Matcher::vector_element_basic_type(this);
24700 int opc = this->ideal_Opcode();
24701 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24702 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24703 %}
24704 ins_pipe( pipe_slow );
24705 %}
24706
24707 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24708 predicate(n->as_ShiftV()->is_var_shift());
24709 match(Set dst (URShiftVS (Binary dst src2) mask));
24710 match(Set dst (URShiftVI (Binary dst src2) mask));
24711 match(Set dst (URShiftVL (Binary dst src2) mask));
24712 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24713 ins_encode %{
24714 int vlen_enc = vector_length_encoding(this);
24715 BasicType bt = Matcher::vector_element_basic_type(this);
24716 int opc = this->ideal_Opcode();
24717 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24718 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24719 %}
24720 ins_pipe( pipe_slow );
24721 %}
24722
24723 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24724 match(Set dst (MaxV (Binary dst src2) mask));
24725 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24726 ins_encode %{
24727 int vlen_enc = vector_length_encoding(this);
24728 BasicType bt = Matcher::vector_element_basic_type(this);
24729 int opc = this->ideal_Opcode();
24730 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24731 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24732 %}
24733 ins_pipe( pipe_slow );
24734 %}
24735
24736 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24737 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24738 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24739 ins_encode %{
24740 int vlen_enc = vector_length_encoding(this);
24741 BasicType bt = Matcher::vector_element_basic_type(this);
24742 int opc = this->ideal_Opcode();
24743 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24744 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24745 %}
24746 ins_pipe( pipe_slow );
24747 %}
24748
24749 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24750 match(Set dst (MinV (Binary dst src2) mask));
24751 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24752 ins_encode %{
24753 int vlen_enc = vector_length_encoding(this);
24754 BasicType bt = Matcher::vector_element_basic_type(this);
24755 int opc = this->ideal_Opcode();
24756 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24757 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24758 %}
24759 ins_pipe( pipe_slow );
24760 %}
24761
24762 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24763 match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24764 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24765 ins_encode %{
24766 int vlen_enc = vector_length_encoding(this);
24767 BasicType bt = Matcher::vector_element_basic_type(this);
24768 int opc = this->ideal_Opcode();
24769 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24770 $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24771 %}
24772 ins_pipe( pipe_slow );
24773 %}
24774
24775 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24776 match(Set dst (VectorRearrange (Binary dst src2) mask));
24777 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24778 ins_encode %{
24779 int vlen_enc = vector_length_encoding(this);
24780 BasicType bt = Matcher::vector_element_basic_type(this);
24781 int opc = this->ideal_Opcode();
24782 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24783 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24784 %}
24785 ins_pipe( pipe_slow );
24786 %}
24787
24788 instruct vabs_masked(vec dst, kReg mask) %{
24789 match(Set dst (AbsVB dst mask));
24790 match(Set dst (AbsVS dst mask));
24791 match(Set dst (AbsVI dst mask));
24792 match(Set dst (AbsVL dst mask));
24793 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24794 ins_encode %{
24795 int vlen_enc = vector_length_encoding(this);
24796 BasicType bt = Matcher::vector_element_basic_type(this);
24797 int opc = this->ideal_Opcode();
24798 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24799 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24800 %}
24801 ins_pipe( pipe_slow );
24802 %}
24803
24804 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24805 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24806 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24807 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24808 ins_encode %{
24809 assert(UseFMA, "Needs FMA instructions support.");
24810 int vlen_enc = vector_length_encoding(this);
24811 BasicType bt = Matcher::vector_element_basic_type(this);
24812 int opc = this->ideal_Opcode();
24813 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24814 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24815 %}
24816 ins_pipe( pipe_slow );
24817 %}
24818
24819 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24820 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24821 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24822 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24823 ins_encode %{
24824 assert(UseFMA, "Needs FMA instructions support.");
24825 int vlen_enc = vector_length_encoding(this);
24826 BasicType bt = Matcher::vector_element_basic_type(this);
24827 int opc = this->ideal_Opcode();
24828 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24829 $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24830 %}
24831 ins_pipe( pipe_slow );
24832 %}
24833
24834 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24835 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24836 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24837 ins_encode %{
24838 assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24839 int vlen_enc = vector_length_encoding(this, $src1);
24840 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24841
24842 // Comparison i
24843 switch (src1_elem_bt) {
24844 case T_BYTE: {
24845 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24846 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24847 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24848 break;
24849 }
24850 case T_SHORT: {
24851 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24852 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24853 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24854 break;
24855 }
24856 case T_INT: {
24857 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24858 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24859 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24860 break;
24861 }
24862 case T_LONG: {
24863 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24864 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24865 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24866 break;
24867 }
24868 case T_FLOAT: {
24869 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24870 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24871 break;
24872 }
24873 case T_DOUBLE: {
24874 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24875 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24876 break;
24877 }
24878 default: assert(false, "%s", type2name(src1_elem_bt)); break;
24879 }
24880 %}
24881 ins_pipe( pipe_slow );
24882 %}
24883
24884 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24885 predicate(Matcher::vector_length(n) <= 32);
24886 match(Set dst (MaskAll src));
24887 format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24888 ins_encode %{
24889 int mask_len = Matcher::vector_length(this);
24890 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24891 %}
24892 ins_pipe( pipe_slow );
24893 %}
24894
24895 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24896 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24897 match(Set dst (XorVMask src (MaskAll cnt)));
24898 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24899 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24900 ins_encode %{
24901 uint masklen = Matcher::vector_length(this);
24902 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24903 %}
24904 ins_pipe( pipe_slow );
24905 %}
24906
24907 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24908 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24909 (Matcher::vector_length(n) == 16) ||
24910 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24911 match(Set dst (XorVMask src (MaskAll cnt)));
24912 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24913 ins_encode %{
24914 uint masklen = Matcher::vector_length(this);
24915 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24916 %}
24917 ins_pipe( pipe_slow );
24918 %}
24919
24920 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24921 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24922 match(Set dst (VectorLongToMask src));
24923 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24924 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24925 ins_encode %{
24926 int mask_len = Matcher::vector_length(this);
24927 int vec_enc = vector_length_encoding(mask_len);
24928 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24929 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24930 %}
24931 ins_pipe( pipe_slow );
24932 %}
24933
24934
24935 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24936 predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24937 match(Set dst (VectorLongToMask src));
24938 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24939 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24940 ins_encode %{
24941 int mask_len = Matcher::vector_length(this);
24942 assert(mask_len <= 32, "invalid mask length");
24943 int vec_enc = vector_length_encoding(mask_len);
24944 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24945 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24946 %}
24947 ins_pipe( pipe_slow );
24948 %}
24949
24950 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24951 predicate(n->bottom_type()->isa_pvectmask());
24952 match(Set dst (VectorLongToMask src));
24953 format %{ "long_to_mask_evex $dst, $src\t!" %}
24954 ins_encode %{
24955 __ kmov($dst$$KRegister, $src$$Register);
24956 %}
24957 ins_pipe( pipe_slow );
24958 %}
24959
24960 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24961 match(Set dst (AndVMask src1 src2));
24962 match(Set dst (OrVMask src1 src2));
24963 match(Set dst (XorVMask src1 src2));
24964 effect(TEMP kscratch);
24965 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24966 ins_encode %{
24967 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24968 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24969 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24970 uint masklen = Matcher::vector_length(this);
24971 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24972 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24973 %}
24974 ins_pipe( pipe_slow );
24975 %}
24976
24977 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24978 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24979 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24980 ins_encode %{
24981 int vlen_enc = vector_length_encoding(this);
24982 BasicType bt = Matcher::vector_element_basic_type(this);
24983 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24984 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24985 %}
24986 ins_pipe( pipe_slow );
24987 %}
24988
24989 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24990 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24991 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24992 ins_encode %{
24993 int vlen_enc = vector_length_encoding(this);
24994 BasicType bt = Matcher::vector_element_basic_type(this);
24995 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24996 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24997 %}
24998 ins_pipe( pipe_slow );
24999 %}
25000
25001 instruct castMM(kReg dst)
25002 %{
25003 match(Set dst (CastVV dst));
25004
25005 size(0);
25006 format %{ "# castVV of $dst" %}
25007 ins_encode(/* empty encoding */);
25008 ins_cost(0);
25009 ins_pipe(empty);
25010 %}
25011
25012 instruct castVV(vec dst)
25013 %{
25014 match(Set dst (CastVV dst));
25015
25016 size(0);
25017 format %{ "# castVV of $dst" %}
25018 ins_encode(/* empty encoding */);
25019 ins_cost(0);
25020 ins_pipe(empty);
25021 %}
25022
25023 instruct castVVLeg(legVec dst)
25024 %{
25025 match(Set dst (CastVV dst));
25026
25027 size(0);
25028 format %{ "# castVV of $dst" %}
25029 ins_encode(/* empty encoding */);
25030 ins_cost(0);
25031 ins_pipe(empty);
25032 %}
25033
25034 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25035 %{
25036 match(Set dst (IsInfiniteF src));
25037 effect(TEMP ktmp, KILL cr);
25038 format %{ "float_class_check $dst, $src" %}
25039 ins_encode %{
25040 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25041 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25042 %}
25043 ins_pipe(pipe_slow);
25044 %}
25045
25046 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25047 %{
25048 match(Set dst (IsInfiniteD src));
25049 effect(TEMP ktmp, KILL cr);
25050 format %{ "double_class_check $dst, $src" %}
25051 ins_encode %{
25052 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25053 __ kmovbl($dst$$Register, $ktmp$$KRegister);
25054 %}
25055 ins_pipe(pipe_slow);
25056 %}
25057
25058 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25059 %{
25060 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25061 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25062 match(Set dst (SaturatingAddV src1 src2));
25063 match(Set dst (SaturatingSubV src1 src2));
25064 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25065 ins_encode %{
25066 int vlen_enc = vector_length_encoding(this);
25067 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25068 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25069 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25070 %}
25071 ins_pipe(pipe_slow);
25072 %}
25073
25074 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25075 %{
25076 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25077 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25078 match(Set dst (SaturatingAddV src1 src2));
25079 match(Set dst (SaturatingSubV src1 src2));
25080 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25081 ins_encode %{
25082 int vlen_enc = vector_length_encoding(this);
25083 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25084 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25085 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25086 %}
25087 ins_pipe(pipe_slow);
25088 %}
25089
25090 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25091 %{
25092 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25093 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25094 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25095 match(Set dst (SaturatingAddV src1 src2));
25096 match(Set dst (SaturatingSubV src1 src2));
25097 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25098 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25099 ins_encode %{
25100 int vlen_enc = vector_length_encoding(this);
25101 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25102 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25103 $src1$$XMMRegister, $src2$$XMMRegister,
25104 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25105 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25106 %}
25107 ins_pipe(pipe_slow);
25108 %}
25109
25110 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25111 %{
25112 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25113 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25114 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25115 match(Set dst (SaturatingAddV src1 src2));
25116 match(Set dst (SaturatingSubV src1 src2));
25117 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25118 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25119 ins_encode %{
25120 int vlen_enc = vector_length_encoding(this);
25121 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25122 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25123 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25124 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25125 %}
25126 ins_pipe(pipe_slow);
25127 %}
25128
25129 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25130 %{
25131 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25132 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25133 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25134 match(Set dst (SaturatingAddV src1 src2));
25135 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25136 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25137 ins_encode %{
25138 int vlen_enc = vector_length_encoding(this);
25139 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25140 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25141 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25142 %}
25143 ins_pipe(pipe_slow);
25144 %}
25145
25146 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25147 %{
25148 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25149 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25150 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25151 match(Set dst (SaturatingAddV src1 src2));
25152 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25153 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25154 ins_encode %{
25155 int vlen_enc = vector_length_encoding(this);
25156 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25157 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25158 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25159 %}
25160 ins_pipe(pipe_slow);
25161 %}
25162
25163 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25164 %{
25165 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25166 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25167 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25168 match(Set dst (SaturatingSubV src1 src2));
25169 effect(TEMP ktmp);
25170 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25171 ins_encode %{
25172 int vlen_enc = vector_length_encoding(this);
25173 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25174 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25175 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25176 %}
25177 ins_pipe(pipe_slow);
25178 %}
25179
25180 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25181 %{
25182 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25183 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25184 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25185 match(Set dst (SaturatingSubV src1 src2));
25186 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25187 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25188 ins_encode %{
25189 int vlen_enc = vector_length_encoding(this);
25190 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25191 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25192 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25193 %}
25194 ins_pipe(pipe_slow);
25195 %}
25196
25197 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25198 %{
25199 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25200 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25201 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25202 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25203 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25204 ins_encode %{
25205 int vlen_enc = vector_length_encoding(this);
25206 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25207 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25208 $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25209 %}
25210 ins_pipe(pipe_slow);
25211 %}
25212
25213 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25214 %{
25215 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25216 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25217 match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25218 match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25219 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25220 ins_encode %{
25221 int vlen_enc = vector_length_encoding(this);
25222 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25223 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25224 $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25225 %}
25226 ins_pipe(pipe_slow);
25227 %}
25228
25229 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25230 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25231 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25232 match(Set dst (SaturatingAddV (Binary dst src) mask));
25233 match(Set dst (SaturatingSubV (Binary dst src) mask));
25234 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25235 ins_encode %{
25236 int vlen_enc = vector_length_encoding(this);
25237 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25238 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25239 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25240 %}
25241 ins_pipe( pipe_slow );
25242 %}
25243
25244 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25245 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25246 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25247 match(Set dst (SaturatingAddV (Binary dst src) mask));
25248 match(Set dst (SaturatingSubV (Binary dst src) mask));
25249 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25250 ins_encode %{
25251 int vlen_enc = vector_length_encoding(this);
25252 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25253 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25254 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25255 %}
25256 ins_pipe( pipe_slow );
25257 %}
25258
25259 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25260 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25261 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25262 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25263 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25264 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25265 ins_encode %{
25266 int vlen_enc = vector_length_encoding(this);
25267 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25268 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25269 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25270 %}
25271 ins_pipe( pipe_slow );
25272 %}
25273
25274 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25275 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25276 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25277 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25278 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25279 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25280 ins_encode %{
25281 int vlen_enc = vector_length_encoding(this);
25282 BasicType elem_bt = Matcher::vector_element_basic_type(this);
25283 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25284 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25285 %}
25286 ins_pipe( pipe_slow );
25287 %}
25288
25289 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25290 %{
25291 match(Set index (SelectFromTwoVector (Binary index src1) src2));
25292 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25293 ins_encode %{
25294 int vlen_enc = vector_length_encoding(this);
25295 BasicType bt = Matcher::vector_element_basic_type(this);
25296 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25297 %}
25298 ins_pipe(pipe_slow);
25299 %}
25300
25301 instruct reinterpretS2HF(regF dst, rRegI src)
25302 %{
25303 match(Set dst (ReinterpretS2HF src));
25304 format %{ "evmovw $dst, $src" %}
25305 ins_encode %{
25306 __ evmovw($dst$$XMMRegister, $src$$Register);
25307 %}
25308 ins_pipe(pipe_slow);
25309 %}
25310
25311 instruct reinterpretHF2S(rRegI dst, regF src)
25312 %{
25313 match(Set dst (ReinterpretHF2S src));
25314 format %{ "evmovw $dst, $src" %}
25315 ins_encode %{
25316 __ evmovw($dst$$Register, $src$$XMMRegister);
25317 __ narrow_subword_type($dst$$Register, T_SHORT);
25318 %}
25319 ins_pipe(pipe_slow);
25320 %}
25321
25322 instruct convF2HFAndS2HF(regF dst, regF src)
25323 %{
25324 match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25325 format %{ "convF2HFAndS2HF $dst, $src" %}
25326 ins_encode %{
25327 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25328 %}
25329 ins_pipe(pipe_slow);
25330 %}
25331
25332 instruct convHF2SAndHF2F(regF dst, regF src)
25333 %{
25334 match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25335 format %{ "convHF2SAndHF2F $dst, $src" %}
25336 ins_encode %{
25337 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25338 %}
25339 ins_pipe(pipe_slow);
25340 %}
25341
25342 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25343 %{
25344 match(Set dst (SqrtHF src));
25345 format %{ "scalar_sqrt_fp16 $dst, $src" %}
25346 ins_encode %{
25347 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25348 %}
25349 ins_pipe(pipe_slow);
25350 %}
25351
25352 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25353 %{
25354 match(Set dst (AddHF src1 src2));
25355 match(Set dst (DivHF src1 src2));
25356 match(Set dst (MulHF src1 src2));
25357 match(Set dst (SubHF src1 src2));
25358 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25359 ins_encode %{
25360 int opcode = this->ideal_Opcode();
25361 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25362 %}
25363 ins_pipe(pipe_slow);
25364 %}
25365
25366 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25367 %{
25368 predicate(VM_Version::supports_avx10_2());
25369 match(Set dst (MaxHF src1 src2));
25370 match(Set dst (MinHF src1 src2));
25371
25372 format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25373 ins_encode %{
25374 int opcode = this->ideal_Opcode();
25375 __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25376 %}
25377 ins_pipe( pipe_slow );
25378 %}
25379
25380 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25381 %{
25382 predicate(!VM_Version::supports_avx10_2());
25383 match(Set dst (MaxHF src1 src2));
25384 match(Set dst (MinHF src1 src2));
25385 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25386
25387 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25388 ins_encode %{
25389 int opcode = this->ideal_Opcode();
25390 __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25391 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25392 %}
25393 ins_pipe( pipe_slow );
25394 %}
25395
25396 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25397 %{
25398 match(Set dst (FmaHF src2 (Binary dst src1)));
25399 effect(DEF dst);
25400 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25401 ins_encode %{
25402 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25403 %}
25404 ins_pipe( pipe_slow );
25405 %}
25406
25407
25408 instruct vector_sqrt_HF_reg(vec dst, vec src)
25409 %{
25410 match(Set dst (SqrtVHF src));
25411 format %{ "vector_sqrt_fp16 $dst, $src" %}
25412 ins_encode %{
25413 int vlen_enc = vector_length_encoding(this);
25414 __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25415 %}
25416 ins_pipe(pipe_slow);
25417 %}
25418
25419 instruct vector_sqrt_HF_mem(vec dst, memory src)
25420 %{
25421 match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25422 format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25423 ins_encode %{
25424 int vlen_enc = vector_length_encoding(this);
25425 __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25426 %}
25427 ins_pipe(pipe_slow);
25428 %}
25429
25430 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25431 %{
25432 match(Set dst (AddVHF src1 src2));
25433 match(Set dst (DivVHF src1 src2));
25434 match(Set dst (MulVHF src1 src2));
25435 match(Set dst (SubVHF src1 src2));
25436 format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25437 ins_encode %{
25438 int vlen_enc = vector_length_encoding(this);
25439 int opcode = this->ideal_Opcode();
25440 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25441 %}
25442 ins_pipe(pipe_slow);
25443 %}
25444
25445
25446 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25447 %{
25448 match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25449 match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25450 match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25451 match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25452 format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25453 ins_encode %{
25454 int vlen_enc = vector_length_encoding(this);
25455 int opcode = this->ideal_Opcode();
25456 __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25457 %}
25458 ins_pipe(pipe_slow);
25459 %}
25460
25461 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25462 %{
25463 match(Set dst (FmaVHF src2 (Binary dst src1)));
25464 format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25465 ins_encode %{
25466 int vlen_enc = vector_length_encoding(this);
25467 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25468 %}
25469 ins_pipe( pipe_slow );
25470 %}
25471
25472 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25473 %{
25474 match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25475 format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25476 ins_encode %{
25477 int vlen_enc = vector_length_encoding(this);
25478 __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25479 %}
25480 ins_pipe( pipe_slow );
25481 %}
25482
25483 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25484 %{
25485 predicate(VM_Version::supports_avx10_2());
25486 match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25487 match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25488 format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25489 ins_encode %{
25490 int vlen_enc = vector_length_encoding(this);
25491 int opcode = this->ideal_Opcode();
25492 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25493 k0, vlen_enc);
25494 %}
25495 ins_pipe( pipe_slow );
25496 %}
25497
25498 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25499 %{
25500 predicate(VM_Version::supports_avx10_2());
25501 match(Set dst (MinVHF src1 src2));
25502 match(Set dst (MaxVHF src1 src2));
25503 format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25504 ins_encode %{
25505 int vlen_enc = vector_length_encoding(this);
25506 int opcode = this->ideal_Opcode();
25507 __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25508 k0, vlen_enc);
25509 %}
25510 ins_pipe( pipe_slow );
25511 %}
25512
25513 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25514 %{
25515 predicate(!VM_Version::supports_avx10_2());
25516 match(Set dst (MinVHF src1 src2));
25517 match(Set dst (MaxVHF src1 src2));
25518 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25519 format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25520 ins_encode %{
25521 int vlen_enc = vector_length_encoding(this);
25522 int opcode = this->ideal_Opcode();
25523 __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25524 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25525 %}
25526 ins_pipe( pipe_slow );
25527 %}
25528
25529 //----------PEEPHOLE RULES-----------------------------------------------------
25530 // These must follow all instruction definitions as they use the names
25531 // defined in the instructions definitions.
25532 //
25533 // peeppredicate ( rule_predicate );
25534 // // the predicate unless which the peephole rule will be ignored
25535 //
25536 // peepmatch ( root_instr_name [preceding_instruction]* );
25537 //
25538 // peepprocedure ( procedure_name );
25539 // // provide a procedure name to perform the optimization, the procedure should
25540 // // reside in the architecture dependent peephole file, the method has the
25541 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25542 // // with the arguments being the basic block, the current node index inside the
25543 // // block, the register allocator, the functions upon invoked return a new node
25544 // // defined in peepreplace, and the rules of the nodes appearing in the
25545 // // corresponding peepmatch, the function return true if successful, else
25546 // // return false
25547 //
25548 // peepconstraint %{
25549 // (instruction_number.operand_name relational_op instruction_number.operand_name
25550 // [, ...] );
25551 // // instruction numbers are zero-based using left to right order in peepmatch
25552 //
25553 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
25554 // // provide an instruction_number.operand_name for each operand that appears
25555 // // in the replacement instruction's match rule
25556 //
25557 // ---------VM FLAGS---------------------------------------------------------
25558 //
25559 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25560 //
25561 // Each peephole rule is given an identifying number starting with zero and
25562 // increasing by one in the order seen by the parser. An individual peephole
25563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25564 // on the command-line.
25565 //
25566 // ---------CURRENT LIMITATIONS----------------------------------------------
25567 //
25568 // Only transformations inside a basic block (do we need more for peephole)
25569 //
25570 // ---------EXAMPLE----------------------------------------------------------
25571 //
25572 // // pertinent parts of existing instructions in architecture description
25573 // instruct movI(rRegI dst, rRegI src)
25574 // %{
25575 // match(Set dst (CopyI src));
25576 // %}
25577 //
25578 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25579 // %{
25580 // match(Set dst (AddI dst src));
25581 // effect(KILL cr);
25582 // %}
25583 //
25584 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25585 // %{
25586 // match(Set dst (AddI dst src));
25587 // %}
25588 //
25589 // 1. Simple replacement
25590 // - Only match adjacent instructions in same basic block
25591 // - Only equality constraints
25592 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25593 // - Only one replacement instruction
25594 //
25595 // // Change (inc mov) to lea
25596 // peephole %{
25597 // // lea should only be emitted when beneficial
25598 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25599 // // increment preceded by register-register move
25600 // peepmatch ( incI_rReg movI );
25601 // // require that the destination register of the increment
25602 // // match the destination register of the move
25603 // peepconstraint ( 0.dst == 1.dst );
25604 // // construct a replacement instruction that sets
25605 // // the destination to ( move's source register + one )
25606 // peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25607 // %}
25608 //
25609 // 2. Procedural replacement
25610 // - More flexible finding relevent nodes
25611 // - More flexible constraints
25612 // - More flexible transformations
25613 // - May utilise architecture-dependent API more effectively
25614 // - Currently only one replacement instruction due to adlc parsing capabilities
25615 //
25616 // // Change (inc mov) to lea
25617 // peephole %{
25618 // // lea should only be emitted when beneficial
25619 // peeppredicate( VM_Version::supports_fast_2op_lea() );
25620 // // the rule numbers of these nodes inside are passed into the function below
25621 // peepmatch ( incI_rReg movI );
25622 // // the method that takes the responsibility of transformation
25623 // peepprocedure ( inc_mov_to_lea );
25624 // // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25625 // // node is passed into the function above
25626 // peepreplace ( leaI_rReg_immI() );
25627 // %}
25628
25629 // These instructions is not matched by the matcher but used by the peephole
25630 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25631 %{
25632 predicate(false);
25633 match(Set dst (AddI src1 src2));
25634 format %{ "leal $dst, [$src1 + $src2]" %}
25635 ins_encode %{
25636 Register dst = $dst$$Register;
25637 Register src1 = $src1$$Register;
25638 Register src2 = $src2$$Register;
25639 if (src1 != rbp && src1 != r13) {
25640 __ leal(dst, Address(src1, src2, Address::times_1));
25641 } else {
25642 assert(src2 != rbp && src2 != r13, "");
25643 __ leal(dst, Address(src2, src1, Address::times_1));
25644 }
25645 %}
25646 ins_pipe(ialu_reg_reg);
25647 %}
25648
25649 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25650 %{
25651 predicate(false);
25652 match(Set dst (AddI src1 src2));
25653 format %{ "leal $dst, [$src1 + $src2]" %}
25654 ins_encode %{
25655 __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25656 %}
25657 ins_pipe(ialu_reg_reg);
25658 %}
25659
25660 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25661 %{
25662 predicate(false);
25663 match(Set dst (LShiftI src shift));
25664 format %{ "leal $dst, [$src << $shift]" %}
25665 ins_encode %{
25666 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25667 Register src = $src$$Register;
25668 if (scale == Address::times_2 && src != rbp && src != r13) {
25669 __ leal($dst$$Register, Address(src, src, Address::times_1));
25670 } else {
25671 __ leal($dst$$Register, Address(noreg, src, scale));
25672 }
25673 %}
25674 ins_pipe(ialu_reg_reg);
25675 %}
25676
25677 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25678 %{
25679 predicate(false);
25680 match(Set dst (AddL src1 src2));
25681 format %{ "leaq $dst, [$src1 + $src2]" %}
25682 ins_encode %{
25683 Register dst = $dst$$Register;
25684 Register src1 = $src1$$Register;
25685 Register src2 = $src2$$Register;
25686 if (src1 != rbp && src1 != r13) {
25687 __ leaq(dst, Address(src1, src2, Address::times_1));
25688 } else {
25689 assert(src2 != rbp && src2 != r13, "");
25690 __ leaq(dst, Address(src2, src1, Address::times_1));
25691 }
25692 %}
25693 ins_pipe(ialu_reg_reg);
25694 %}
25695
25696 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25697 %{
25698 predicate(false);
25699 match(Set dst (AddL src1 src2));
25700 format %{ "leaq $dst, [$src1 + $src2]" %}
25701 ins_encode %{
25702 __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25703 %}
25704 ins_pipe(ialu_reg_reg);
25705 %}
25706
25707 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25708 %{
25709 predicate(false);
25710 match(Set dst (LShiftL src shift));
25711 format %{ "leaq $dst, [$src << $shift]" %}
25712 ins_encode %{
25713 Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25714 Register src = $src$$Register;
25715 if (scale == Address::times_2 && src != rbp && src != r13) {
25716 __ leaq($dst$$Register, Address(src, src, Address::times_1));
25717 } else {
25718 __ leaq($dst$$Register, Address(noreg, src, scale));
25719 }
25720 %}
25721 ins_pipe(ialu_reg_reg);
25722 %}
25723
25724 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25725 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25726 // processors with at least partial ALU support for lea
25727 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25728 // beneficial for processors with full ALU support
25729 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25730
25731 peephole
25732 %{
25733 peeppredicate(VM_Version::supports_fast_2op_lea());
25734 peepmatch (addI_rReg);
25735 peepprocedure (lea_coalesce_reg);
25736 peepreplace (leaI_rReg_rReg_peep());
25737 %}
25738
25739 peephole
25740 %{
25741 peeppredicate(VM_Version::supports_fast_2op_lea());
25742 peepmatch (addI_rReg_imm);
25743 peepprocedure (lea_coalesce_imm);
25744 peepreplace (leaI_rReg_immI_peep());
25745 %}
25746
25747 peephole
25748 %{
25749 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25750 VM_Version::is_intel_cascade_lake());
25751 peepmatch (incI_rReg);
25752 peepprocedure (lea_coalesce_imm);
25753 peepreplace (leaI_rReg_immI_peep());
25754 %}
25755
25756 peephole
25757 %{
25758 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25759 VM_Version::is_intel_cascade_lake());
25760 peepmatch (decI_rReg);
25761 peepprocedure (lea_coalesce_imm);
25762 peepreplace (leaI_rReg_immI_peep());
25763 %}
25764
25765 peephole
25766 %{
25767 peeppredicate(VM_Version::supports_fast_2op_lea());
25768 peepmatch (salI_rReg_immI2);
25769 peepprocedure (lea_coalesce_imm);
25770 peepreplace (leaI_rReg_immI2_peep());
25771 %}
25772
25773 peephole
25774 %{
25775 peeppredicate(VM_Version::supports_fast_2op_lea());
25776 peepmatch (addL_rReg);
25777 peepprocedure (lea_coalesce_reg);
25778 peepreplace (leaL_rReg_rReg_peep());
25779 %}
25780
25781 peephole
25782 %{
25783 peeppredicate(VM_Version::supports_fast_2op_lea());
25784 peepmatch (addL_rReg_imm);
25785 peepprocedure (lea_coalesce_imm);
25786 peepreplace (leaL_rReg_immL32_peep());
25787 %}
25788
25789 peephole
25790 %{
25791 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25792 VM_Version::is_intel_cascade_lake());
25793 peepmatch (incL_rReg);
25794 peepprocedure (lea_coalesce_imm);
25795 peepreplace (leaL_rReg_immL32_peep());
25796 %}
25797
25798 peephole
25799 %{
25800 peeppredicate(VM_Version::supports_fast_3op_lea() ||
25801 VM_Version::is_intel_cascade_lake());
25802 peepmatch (decL_rReg);
25803 peepprocedure (lea_coalesce_imm);
25804 peepreplace (leaL_rReg_immL32_peep());
25805 %}
25806
25807 peephole
25808 %{
25809 peeppredicate(VM_Version::supports_fast_2op_lea());
25810 peepmatch (salL_rReg_immI2);
25811 peepprocedure (lea_coalesce_imm);
25812 peepreplace (leaL_rReg_immI2_peep());
25813 %}
25814
25815 peephole
25816 %{
25817 peepmatch (leaPCompressedOopOffset);
25818 peepprocedure (lea_remove_redundant);
25819 %}
25820
25821 peephole
25822 %{
25823 peepmatch (leaP8Narrow);
25824 peepprocedure (lea_remove_redundant);
25825 %}
25826
25827 peephole
25828 %{
25829 peepmatch (leaP32Narrow);
25830 peepprocedure (lea_remove_redundant);
25831 %}
25832
25833 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25834 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25835
25836 //int variant
25837 peephole
25838 %{
25839 peepmatch (testI_reg);
25840 peepprocedure (test_may_remove);
25841 %}
25842
25843 //long variant
25844 peephole
25845 %{
25846 peepmatch (testL_reg);
25847 peepprocedure (test_may_remove);
25848 %}
25849
25850
25851 //----------SMARTSPILL RULES---------------------------------------------------
25852 // These must follow all instruction definitions as they use the names
25853 // defined in the instructions definitions.